In this week’s #TidyTuesday, my modelling goal is to build a predictive model to forecast the following three month of change_yoy in the United States Monthly Retail Sales (MSRS).
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0.9000
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.2 ✔ rsample 1.1.1
## ✔ dials 1.1.0 ✔ tune 1.0.1
## ✔ infer 1.0.4 ✔ workflows 1.1.2
## ✔ modeldata 1.0.1 ✔ workflowsets 1.0.0
## ✔ parsnip 1.0.3 ✔ yardstick 1.1.0
## ✔ recipes 1.0.3
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(timetk)
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(modeltime)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
##
## The following object is masked from 'package:yardstick':
##
## accuracy
library(slider)
library(rules)
##
## Attaching package: 'rules'
##
## The following object is masked from 'package:dials':
##
## max_rules
state_retail <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-12-13/state_retail.csv', col_types = "cciciiccc")
data <- state_retail %>%
filter(!is.na(change_yoy)) %>%
mutate(change_yoy = ifelse(change_yoy == "S", 0, change_yoy)) %>%
mutate(change_yoy = parse_number(change_yoy)) %>%
mutate(date = lubridate::make_date(year,month,1)) %>%
arrange(date) %>%
summarise_by_time(
.date_var = date,
.by = "month",
change_yoy = mean(change_yoy, na.rm = TRUE),
) %>%
pad_by_time(date, .by = "month", .pad_value = NA) %>%
filter_by_time(
.date_var = date,
.start_date = "2019-03-01",
.end_date = "end"
) %>%
print()
## # A tibble: 42 × 2
## date change_yoy
## <date> <dbl>
## 1 2019-03-01 -1.18
## 2 2019-04-01 3.62
## 3 2019-05-01 0.0904
## 4 2019-06-01 -1.02
## 5 2019-07-01 3.01
## 6 2019-08-01 2.03
## 7 2019-09-01 1.60
## 8 2019-10-01 1.91
## 9 2019-11-01 1.20
## 10 2019-12-01 2.86
## # … with 32 more rows
There’s no missing values left.
data %>%
filter(is.na(change_yoy))
## # A tibble: 0 × 2
## # … with 2 variables: date <date>, change_yoy <dbl>
That’s a plot of the data that I prepared:
p <- data %>%
plot_time_series(date, change_yoy, .smooth = FALSE, .interactive = FALSE) +
labs(title = "United States Monthly State Retail Sales (MSRS)")
ggplotly(p)
Some diagnostics:
data %>% tk_summary_diagnostics(.date_var = date)
## # A tibble: 1 × 12
## n.obs start end units scale tzone diff.m…¹ diff.q1 diff.…² diff.…³
## <int> <date> <date> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 42 2019-03-01 2022-08-01 days month UTC 2419200 2592000 2678400 2.63e6
## # … with 2 more variables: diff.q3 <dbl>, diff.maximum <dbl>, and abbreviated
## # variable names ¹diff.minimum, ²diff.median, ³diff.mean
data %>%
plot_seasonal_diagnostics(date, change_yoy)
data %>%
mutate(trans_change_yoy = standardize_vec(log(change_yoy + 1))) %>%
filter(!is.na(trans_change_yoy)) %>%
plot_seasonal_diagnostics(date, trans_change_yoy)
## Warning in log(change_yoy + 1): NaNs produced
## Standardization Parameters
## mean: 1.95022029388792
## standard deviation: 0.988547965666905
p <- data %>%
plot_stl_diagnostics(date, change_yoy, .frequency = "1 quarter", .trend = "1 year", .interactive = FALSE) +
facet_wrap(c(".group"), ncol = 2)
## frequency = 3 observations per 1 quarter
## trend = 12 observations per 1 year
ggplotly(p)
p <- data %>%
mutate(trans_change_yoy = standardize_vec(log(change_yoy + 1))) %>%
filter(!is.na(trans_change_yoy)) %>%
plot_stl_diagnostics(date,trans_change_yoy, .frequency = "1 quarter", .trend = "1 year", .interactive = FALSE) +
facet_wrap(c(".group"), ncol = 2)
## Warning in log(change_yoy + 1): NaNs produced
## Standardization Parameters
## mean: 1.95022029388792
## standard deviation: 0.988547965666905
## frequency = 3 observations per 1 quarter
## trend = 9 observations per 1 year
ggplotly(p)
data %>%
plot_acf_diagnostics(date, change_yoy, .lags = 1000)
## Max lag exceeds data available. Using max lag: 41
data %>%
mutate(trans_change_yoy = standardize_vec(log(change_yoy + 1))) %>%
filter(!is.na(trans_change_yoy)) %>%
plot_acf_diagnostics(date, trans_change_yoy, .lags = 1000)
## Warning in log(change_yoy + 1): NaNs produced
## Standardization Parameters
## mean: 1.95022029388792
## standard deviation: 0.988547965666905
## Max lag exceeds data available. Using max lag: 35
data %>%
plot_anomaly_diagnostics(
.date_var = date,
.value = change_yoy,
.alpha = 0.02,
.max_anomalies = 0.1
)
## frequency = 11 observations per 1 year
## trend = 12 observations per 1 year
Outlier effect before log transformation:
data %>%
mutate(trans_change_yoy = change_yoy + 27) %>%
plot_time_series_regression(
.date_var = date,
change_yoy ~ as.numeric(date) +
year(date) +
semester(date) +
quarter(date) +
month(date, label = TRUE),
.show_summary = TRUE
)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.289 -4.576 -0.811 2.498 48.192
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.728e+06 5.908e+06 -0.631 0.533
## as.numeric(date) -5.165e+00 8.210e+00 -0.629 0.534
## year(date) 1.893e+03 3.000e+03 0.631 0.533
## semester(date) -1.139e+01 3.571e+01 -0.319 0.752
## quarter(date) 4.890e+00 1.789e+01 0.273 0.787
## month(date, label = TRUE).L 1.883e+03 2.994e+03 0.629 0.534
## month(date, label = TRUE).Q 6.351e+00 1.399e+01 0.454 0.653
## month(date, label = TRUE).C -6.142e+00 2.209e+01 -0.278 0.783
## month(date, label = TRUE)^4 -5.196e+00 8.572e+00 -0.606 0.549
## month(date, label = TRUE)^5 6.112e+00 2.151e+01 0.284 0.778
## month(date, label = TRUE)^6 5.903e-01 9.376e+00 0.063 0.950
## month(date, label = TRUE)^7 -6.410e-01 1.211e+01 -0.053 0.958
## month(date, label = TRUE)^8 -4.781e+00 1.033e+01 -0.463 0.647
## month(date, label = TRUE)^9 NA NA NA NA
## month(date, label = TRUE)^10 -5.883e+00 8.820e+00 -0.667 0.510
## month(date, label = TRUE)^11 NA NA NA NA
##
## Residual standard error: 14.56 on 28 degrees of freedom
## Multiple R-squared: 0.1972, Adjusted R-squared: -0.1755
## F-statistic: 0.5291 on 13 and 28 DF, p-value: 0.8866
Cleaning before log transformation
data %>%
mutate(trans_change_yoy = change_yoy + 27) %>%
mutate(change_yoy_cleaned = ts_clean_vec(trans_change_yoy, period = 1, lambda = "auto")) %>%
select(-change_yoy) %>%
pivot_longer(-date) %>%
plot_time_series(date, value, name, .smooth = FALSE)
Outlier effect after log transformation
data %>%
mutate(trans_change_yoy = log(change_yoy + 27)) %>%
plot_time_series_regression(
.date_var = date,
trans_change_yoy ~ as.numeric(date) +
year(date) +
semester(date) +
quarter(date) +
month(date, label = TRUE),
.show_summary = TRUE
)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.33018 -0.11023 0.02742 0.10710 1.16591
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.583e+05 1.727e+05 -1.496 0.146
## as.numeric(date) -3.585e-01 2.400e-01 -1.494 0.146
## year(date) 1.311e+02 8.767e+01 1.496 0.146
## semester(date) -3.377e-01 1.044e+00 -0.324 0.749
## quarter(date) -4.183e-02 5.229e-01 -0.080 0.937
## month(date, label = TRUE).L 1.314e+02 8.750e+01 1.502 0.144
## month(date, label = TRUE).Q 5.623e-01 4.089e-01 1.375 0.180
## month(date, label = TRUE).C -6.369e-01 6.457e-01 -0.986 0.332
## month(date, label = TRUE)^4 -1.354e-01 2.505e-01 -0.541 0.593
## month(date, label = TRUE)^5 3.038e-01 6.286e-01 0.483 0.633
## month(date, label = TRUE)^6 -2.580e-01 2.740e-01 -0.942 0.354
## month(date, label = TRUE)^7 1.739e-01 3.538e-01 0.491 0.627
## month(date, label = TRUE)^8 -2.315e-01 3.020e-01 -0.767 0.450
## month(date, label = TRUE)^9 NA NA NA NA
## month(date, label = TRUE)^10 -1.841e-01 2.578e-01 -0.714 0.481
## month(date, label = TRUE)^11 NA NA NA NA
##
## Residual standard error: 0.4254 on 28 degrees of freedom
## Multiple R-squared: 0.2529, Adjusted R-squared: -0.09399
## F-statistic: 0.729 on 13 and 28 DF, p-value: 0.7209
Cleaning after log transformation
data %>%
mutate(trans_change_yoy = log(change_yoy + 27)) %>%
mutate(change_yoy_cleaned = ts_clean_vec(trans_change_yoy, period = 1, lambda = "auto")) %>%
select(-change_yoy) %>%
pivot_longer(-date) %>%
plot_time_series(date, value, name, .smooth = FALSE)
Data wrangling:
data %>%
mutate_by_time(
.date_var = date,
.by = "3 month",
change_yoy_mean = mean(change_yoy),
change_yoy_median = median(change_yoy),
change_yoy_max = max(change_yoy),
change_yoy_min = min(change_yoy)
) %>%
pivot_longer(change_yoy:change_yoy_min) %>%
plot_time_series(date, value, .facet_vars = name, .facet_ncol = 2)
data %>%
mutate(trans_change_yoy = standardize_vec(log(change_yoy + 1))) %>%
filter(!is.na(trans_change_yoy)) %>%
mutate_by_time(
.date_var = date,
.by = "3 month",
change_yoy_mean = mean(trans_change_yoy),
change_yoy_median = median(trans_change_yoy),
change_yoy_max = max(trans_change_yoy),
change_yoy_min = min(trans_change_yoy)
) %>%
pivot_longer(change_yoy:change_yoy_min) %>%
plot_time_series(date, value, .facet_vars = name, .facet_ncol = 2)
## Warning in log(change_yoy + 1): NaNs produced
## Standardization Parameters
## mean: 1.95022029388792
## standard deviation: 0.988547965666905
data %>%
mutate(trans_change_yoy = standardize_vec(log(change_yoy + 1))) %>%
filter(!is.na(trans_change_yoy)) %>%
plot_time_series_regression(
.date_var = date,
trans_change_yoy ~ as.numeric(date) +
year(date) +
semester(date) +
quarter(date) +
month(date, label = TRUE),
.show_summary = TRUE
)
## Warning in log(change_yoy + 1): NaNs produced
## Standardization Parameters
## mean: 1.95022029388792
## standard deviation: 0.988547965666905
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75233 -0.41649 0.00902 0.48326 1.45034
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.922e+04 4.718e+05 0.147 0.885
## as.numeric(date) 9.773e-02 6.556e-01 0.149 0.883
## year(date) -3.516e+01 2.395e+02 -0.147 0.885
## semester(date) 7.228e-01 2.710e+00 0.267 0.792
## quarter(date) -3.495e-01 1.331e+00 -0.263 0.795
## month(date, label = TRUE).L -3.512e+01 2.392e+02 -0.147 0.885
## month(date, label = TRUE).Q 2.650e-02 1.070e+00 0.025 0.980
## month(date, label = TRUE).C 8.831e-01 1.786e+00 0.494 0.626
## month(date, label = TRUE)^4 -3.822e-01 6.053e-01 -0.631 0.534
## month(date, label = TRUE)^5 -7.680e-01 1.682e+00 -0.457 0.652
## month(date, label = TRUE)^6 4.408e-01 7.079e-01 0.623 0.540
## month(date, label = TRUE)^7 4.573e-03 8.775e-01 0.005 0.996
## month(date, label = TRUE)^8 2.506e-01 7.658e-01 0.327 0.747
## month(date, label = TRUE)^9 NA NA NA NA
## month(date, label = TRUE)^10 -4.432e-01 6.713e-01 -0.660 0.516
## month(date, label = TRUE)^11 NA NA NA NA
##
## Residual standard error: 0.966 on 22 degrees of freedom
## Multiple R-squared: 0.4135, Adjusted R-squared: 0.06687
## F-statistic: 1.193 on 13 and 22 DF, p-value: 0.3458
data %>%
mutate(trans_change_yoy = standardize_vec(box_cox_vec(change_yoy))) %>%
plot_time_series_regression(
.date_var = date,
trans_change_yoy ~ as.numeric(date) +
year(date) +
# semester(date) +
# quarter(date) +
month(date, label = TRUE
),
.show_summary = TRUE
)
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
## box_cox_vec(): Using value for lambda: 0.577882582876949
## Standardization Parameters
## mean: 2.48304605893425
## standard deviation: 5.34313693909691
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.10190 -0.47713 0.04368 0.29153 2.56771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.020e+05 4.020e+05 -1.498 0.145
## as.numeric(date) -8.352e-01 5.586e-01 -1.495 0.146
## year(date) 3.056e+02 2.041e+02 1.498 0.145
## month(date, label = TRUE).L 3.047e+02 2.034e+02 1.498 0.145
## month(date, label = TRUE).Q 1.331e+00 9.519e-01 1.398 0.173
## month(date, label = TRUE).C -9.358e-01 7.581e-01 -1.234 0.227
## month(date, label = TRUE)^4 -3.052e-01 5.832e-01 -0.523 0.605
## month(date, label = TRUE)^5 4.224e-01 5.790e-01 0.730 0.472
## month(date, label = TRUE)^6 -3.206e-01 6.379e-01 -0.503 0.619
## month(date, label = TRUE)^7 7.250e-01 6.281e-01 1.154 0.258
## month(date, label = TRUE)^8 -6.571e-01 7.030e-01 -0.935 0.358
## month(date, label = TRUE)^9 7.207e-02 5.267e-01 0.137 0.892
## month(date, label = TRUE)^10 -7.754e-01 6.001e-01 -1.292 0.207
## month(date, label = TRUE)^11 2.941e-01 5.483e-01 0.536 0.596
##
## Residual standard error: 0.9903 on 28 degrees of freedom
## Multiple R-squared: 0.3303, Adjusted R-squared: 0.01929
## F-statistic: 1.062 on 13 and 28 DF, p-value: 0.427
Exploring moving average:
p <- data %>%
bind_rows(
future_frame(., .date_var = date, .length_out = 3)
) %>%
mutate(
mavg_3 = slidify_vec(change_yoy, .f = ~ mean(.x, na.rm = TRUE), .period = 3, .align = "right"),
mavg_6 = slidify_vec(change_yoy, .f = ~ mean(.x, na.rm = TRUE), .period = 6, .align = "right"),
mavg_12 = slidify_vec(change_yoy, .f = ~ mean(.x, na.rm = TRUE), .period = 12, .align = "right")
) %>%
fill(mavg_3, .direction = "down") %>%
fill(mavg_6, .direction = "down") %>%
fill(mavg_12, .direction = "down") %>%
pivot_longer(-date) %>%
plot_time_series(date, value, name, .smooth = FALSE, .interactive = FALSE) +
labs(title = "Moving average: 3 months")
ggplotly(p)
Exploring lag features:
data %>%
tk_augment_lags(.value = change_yoy, .lags = c(1, 3, 12)) %>%
drop_na() %>%
plot_time_series_regression(
date,
.formula = log(change_yoy + 27) ~
log(change_yoy_lag1 + 27) +
log(change_yoy_lag3 + 27) +
log(change_yoy_lag12 + 27),
.show_summary = TRUE
)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.43942 -0.06644 0.06176 0.14951 0.63253
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1000 0.8378 2.507 0.01877 *
## log(change_yoy_lag1 + 27) 0.5602 0.1522 3.681 0.00107 **
## log(change_yoy_lag3 + 27) 0.1003 0.1499 0.669 0.50954
## log(change_yoy_lag12 + 27) -0.2649 0.1501 -1.766 0.08922 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3643 on 26 degrees of freedom
## Multiple R-squared: 0.4715, Adjusted R-squared: 0.4105
## F-statistic: 7.731 on 3 and 26 DF, p-value: 0.0007496
Exploring fourier features:
data %>%
tk_augment_fourier(date, .periods = c(1, 3, 12), .K = 3) %>%
plot_time_series_regression(
date,
.formula = log(change_yoy + 27)~ as.numeric(date) + . - date,
.show_summary = TRUE
)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.90892 -0.11917 -0.01495 0.09837 0.68279
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.730370 44.344364 1.234 0.2291
## as.numeric(date) -0.002756 0.002381 -1.158 0.2584
## date_sin1_K1 -1.318896 0.847028 -1.557 0.1325
## date_cos1_K1 -0.952460 0.846672 -1.125 0.2717
## date_sin1_K2 -0.035435 0.254183 -0.139 0.8903
## date_cos1_K2 0.315409 0.423853 0.744 0.4640
## date_sin1_K3 0.099606 0.169084 0.589 0.5613
## date_cos1_K3 -0.346224 0.140266 -2.468 0.0211 *
## date_sin3_K1 -0.009917 0.076277 -0.130 0.8976
## date_cos3_K1 -0.016490 0.076878 -0.214 0.8320
## date_sin3_K2 0.047116 0.078072 0.603 0.5518
## date_cos3_K2 0.034238 0.077465 0.442 0.6625
## date_sin3_K3 NA NA NA NA
## date_cos3_K3 NA NA NA NA
## date_sin12_K1 -0.089879 0.084590 -1.063 0.2986
## date_cos12_K1 0.143347 0.116145 1.234 0.2291
## date_sin12_K2 0.094115 0.081031 1.161 0.2569
## date_cos12_K2 0.021780 0.071322 0.305 0.7627
## date_sin12_K3 0.043003 0.079099 0.544 0.5917
## date_cos12_K3 -0.081315 0.075357 -1.079 0.2913
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3161 on 24 degrees of freedom
## Multiple R-squared: 0.6464, Adjusted R-squared: 0.3959
## F-statistic: 2.581 on 17 and 24 DF, p-value: 0.01648
data %>%
plot_time_series(
date,
log_interval_vec(
change_yoy,
limit_lower = 0, # data$change_yoy%>% min() + 27 # 0.460119
limit_upper = 116, # data$change_yoy %>% max() + 27 # 115.5917
offset = 27
)
)
## log_interval_vec():
## Using limit_lower: 0
## Using limit_upper: 116
## Using offset: 27
Explore special dates feature:
special_dates_tbl <- tibble(date = tk_make_timeseries("2019-03-01", "2022-08-01", by = "month")) %>%
mutate(special =
as.numeric(between_time(date, "2020-03-01", "2020-05-01") |
between_time(date, "2021-03-01", "2021-05-01"))
) %>%
left_join(data, by = c("date"))
special_dates_tbl %>%
mutate(special = special * 100) %>%
pivot_longer(-date) %>%
plot_time_series(date, value, name, .smooth = FALSE)
model_formula <- as.formula(
log(change_yoy+27) ~ splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.40)))
+ .
+ (as.factor(month) * month.lbl)
)
special_dates_tbl %>%
tk_augment_timeseries_signature(.date_var = date) %>%
select(-diff, -ends_with("iso"), -ends_with(".xts"), -contains("week"), -contains("day"), -contains("hour"), -contains("minute"), -contains("second"), -contains("am.pm")) %>%
plot_time_series_regression(
date,
.formula = model_formula,
.show_summary = TRUE
)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86752 -0.07446 0.00265 0.06804 0.56576
##
## Coefficients: (137 not defined because of singularities)
## Estimate
## (Intercept) -1.167e+06
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))1 -9.193e+02
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))2 -1.956e+03
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))3 -1.768e+03
## date NA
## special 2.585e-01
## index.num NA
## year 5.779e+02
## half -2.812e+00
## quarter 8.446e-01
## month 4.826e+01
## month.lbl.L NA
## month.lbl.Q 2.160e+00
## month.lbl.C -3.031e+00
## month.lbl^4 -4.186e-01
## month.lbl^5 2.062e+00
## month.lbl^6 -1.111e+00
## month.lbl^7 1.487e-01
## month.lbl^8 -1.248e+00
## month.lbl^9 NA
## month.lbl^10 -8.330e-01
## month.lbl^11 NA
## as.factor(month)2 NA
## as.factor(month)3 NA
## as.factor(month)4 NA
## as.factor(month)5 NA
## as.factor(month)6 NA
## as.factor(month)7 NA
## as.factor(month)8 NA
## as.factor(month)9 NA
## as.factor(month)10 NA
## as.factor(month)11 NA
## as.factor(month)12 NA
## month.lbl.L:as.factor(month)2 NA
## month.lbl.Q:as.factor(month)2 NA
## month.lbl.C:as.factor(month)2 NA
## month.lbl^4:as.factor(month)2 NA
## month.lbl^5:as.factor(month)2 NA
## month.lbl^6:as.factor(month)2 NA
## month.lbl^7:as.factor(month)2 NA
## month.lbl^8:as.factor(month)2 NA
## month.lbl^9:as.factor(month)2 NA
## month.lbl^10:as.factor(month)2 NA
## month.lbl^11:as.factor(month)2 NA
## month.lbl.L:as.factor(month)3 NA
## month.lbl.Q:as.factor(month)3 NA
## month.lbl.C:as.factor(month)3 NA
## month.lbl^4:as.factor(month)3 NA
## month.lbl^5:as.factor(month)3 NA
## month.lbl^6:as.factor(month)3 NA
## month.lbl^7:as.factor(month)3 NA
## month.lbl^8:as.factor(month)3 NA
## month.lbl^9:as.factor(month)3 NA
## month.lbl^10:as.factor(month)3 NA
## month.lbl^11:as.factor(month)3 NA
## month.lbl.L:as.factor(month)4 NA
## month.lbl.Q:as.factor(month)4 NA
## month.lbl.C:as.factor(month)4 NA
## month.lbl^4:as.factor(month)4 NA
## month.lbl^5:as.factor(month)4 NA
## month.lbl^6:as.factor(month)4 NA
## month.lbl^7:as.factor(month)4 NA
## month.lbl^8:as.factor(month)4 NA
## month.lbl^9:as.factor(month)4 NA
## month.lbl^10:as.factor(month)4 NA
## month.lbl^11:as.factor(month)4 NA
## month.lbl.L:as.factor(month)5 NA
## month.lbl.Q:as.factor(month)5 NA
## month.lbl.C:as.factor(month)5 NA
## month.lbl^4:as.factor(month)5 NA
## month.lbl^5:as.factor(month)5 NA
## month.lbl^6:as.factor(month)5 NA
## month.lbl^7:as.factor(month)5 NA
## month.lbl^8:as.factor(month)5 NA
## month.lbl^9:as.factor(month)5 NA
## month.lbl^10:as.factor(month)5 NA
## month.lbl^11:as.factor(month)5 NA
## month.lbl.L:as.factor(month)6 NA
## month.lbl.Q:as.factor(month)6 NA
## month.lbl.C:as.factor(month)6 NA
## month.lbl^4:as.factor(month)6 NA
## month.lbl^5:as.factor(month)6 NA
## month.lbl^6:as.factor(month)6 NA
## month.lbl^7:as.factor(month)6 NA
## month.lbl^8:as.factor(month)6 NA
## month.lbl^9:as.factor(month)6 NA
## month.lbl^10:as.factor(month)6 NA
## month.lbl^11:as.factor(month)6 NA
## month.lbl.L:as.factor(month)7 NA
## month.lbl.Q:as.factor(month)7 NA
## month.lbl.C:as.factor(month)7 NA
## month.lbl^4:as.factor(month)7 NA
## month.lbl^5:as.factor(month)7 NA
## month.lbl^6:as.factor(month)7 NA
## month.lbl^7:as.factor(month)7 NA
## month.lbl^8:as.factor(month)7 NA
## month.lbl^9:as.factor(month)7 NA
## month.lbl^10:as.factor(month)7 NA
## month.lbl^11:as.factor(month)7 NA
## month.lbl.L:as.factor(month)8 NA
## month.lbl.Q:as.factor(month)8 NA
## month.lbl.C:as.factor(month)8 NA
## month.lbl^4:as.factor(month)8 NA
## month.lbl^5:as.factor(month)8 NA
## month.lbl^6:as.factor(month)8 NA
## month.lbl^7:as.factor(month)8 NA
## month.lbl^8:as.factor(month)8 NA
## month.lbl^9:as.factor(month)8 NA
## month.lbl^10:as.factor(month)8 NA
## month.lbl^11:as.factor(month)8 NA
## month.lbl.L:as.factor(month)9 NA
## month.lbl.Q:as.factor(month)9 NA
## month.lbl.C:as.factor(month)9 NA
## month.lbl^4:as.factor(month)9 NA
## month.lbl^5:as.factor(month)9 NA
## month.lbl^6:as.factor(month)9 NA
## month.lbl^7:as.factor(month)9 NA
## month.lbl^8:as.factor(month)9 NA
## month.lbl^9:as.factor(month)9 NA
## month.lbl^10:as.factor(month)9 NA
## month.lbl^11:as.factor(month)9 NA
## month.lbl.L:as.factor(month)10 NA
## month.lbl.Q:as.factor(month)10 NA
## month.lbl.C:as.factor(month)10 NA
## month.lbl^4:as.factor(month)10 NA
## month.lbl^5:as.factor(month)10 NA
## month.lbl^6:as.factor(month)10 NA
## month.lbl^7:as.factor(month)10 NA
## month.lbl^8:as.factor(month)10 NA
## month.lbl^9:as.factor(month)10 NA
## month.lbl^10:as.factor(month)10 NA
## month.lbl^11:as.factor(month)10 NA
## month.lbl.L:as.factor(month)11 NA
## month.lbl.Q:as.factor(month)11 NA
## month.lbl.C:as.factor(month)11 NA
## month.lbl^4:as.factor(month)11 NA
## month.lbl^5:as.factor(month)11 NA
## month.lbl^6:as.factor(month)11 NA
## month.lbl^7:as.factor(month)11 NA
## month.lbl^8:as.factor(month)11 NA
## month.lbl^9:as.factor(month)11 NA
## month.lbl^10:as.factor(month)11 NA
## month.lbl^11:as.factor(month)11 NA
## month.lbl.L:as.factor(month)12 NA
## month.lbl.Q:as.factor(month)12 NA
## month.lbl.C:as.factor(month)12 NA
## month.lbl^4:as.factor(month)12 NA
## month.lbl^5:as.factor(month)12 NA
## month.lbl^6:as.factor(month)12 NA
## month.lbl^7:as.factor(month)12 NA
## month.lbl^8:as.factor(month)12 NA
## month.lbl^9:as.factor(month)12 NA
## month.lbl^10:as.factor(month)12 NA
## month.lbl^11:as.factor(month)12 NA
## Std. Error
## (Intercept) 2.155e+05
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))1 1.699e+02
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))2 3.610e+02
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))3 3.265e+02
## date NA
## special 2.185e-01
## index.num NA
## year 1.067e+02
## half 8.368e-01
## quarter 3.820e-01
## month 8.904e+00
## month.lbl.L NA
## month.lbl.Q 4.037e-01
## month.lbl.C 6.297e-01
## month.lbl^4 1.836e-01
## month.lbl^5 5.340e-01
## month.lbl^6 2.443e-01
## month.lbl^7 2.330e-01
## month.lbl^8 2.808e-01
## month.lbl^9 NA
## month.lbl^10 2.132e-01
## month.lbl^11 NA
## as.factor(month)2 NA
## as.factor(month)3 NA
## as.factor(month)4 NA
## as.factor(month)5 NA
## as.factor(month)6 NA
## as.factor(month)7 NA
## as.factor(month)8 NA
## as.factor(month)9 NA
## as.factor(month)10 NA
## as.factor(month)11 NA
## as.factor(month)12 NA
## month.lbl.L:as.factor(month)2 NA
## month.lbl.Q:as.factor(month)2 NA
## month.lbl.C:as.factor(month)2 NA
## month.lbl^4:as.factor(month)2 NA
## month.lbl^5:as.factor(month)2 NA
## month.lbl^6:as.factor(month)2 NA
## month.lbl^7:as.factor(month)2 NA
## month.lbl^8:as.factor(month)2 NA
## month.lbl^9:as.factor(month)2 NA
## month.lbl^10:as.factor(month)2 NA
## month.lbl^11:as.factor(month)2 NA
## month.lbl.L:as.factor(month)3 NA
## month.lbl.Q:as.factor(month)3 NA
## month.lbl.C:as.factor(month)3 NA
## month.lbl^4:as.factor(month)3 NA
## month.lbl^5:as.factor(month)3 NA
## month.lbl^6:as.factor(month)3 NA
## month.lbl^7:as.factor(month)3 NA
## month.lbl^8:as.factor(month)3 NA
## month.lbl^9:as.factor(month)3 NA
## month.lbl^10:as.factor(month)3 NA
## month.lbl^11:as.factor(month)3 NA
## month.lbl.L:as.factor(month)4 NA
## month.lbl.Q:as.factor(month)4 NA
## month.lbl.C:as.factor(month)4 NA
## month.lbl^4:as.factor(month)4 NA
## month.lbl^5:as.factor(month)4 NA
## month.lbl^6:as.factor(month)4 NA
## month.lbl^7:as.factor(month)4 NA
## month.lbl^8:as.factor(month)4 NA
## month.lbl^9:as.factor(month)4 NA
## month.lbl^10:as.factor(month)4 NA
## month.lbl^11:as.factor(month)4 NA
## month.lbl.L:as.factor(month)5 NA
## month.lbl.Q:as.factor(month)5 NA
## month.lbl.C:as.factor(month)5 NA
## month.lbl^4:as.factor(month)5 NA
## month.lbl^5:as.factor(month)5 NA
## month.lbl^6:as.factor(month)5 NA
## month.lbl^7:as.factor(month)5 NA
## month.lbl^8:as.factor(month)5 NA
## month.lbl^9:as.factor(month)5 NA
## month.lbl^10:as.factor(month)5 NA
## month.lbl^11:as.factor(month)5 NA
## month.lbl.L:as.factor(month)6 NA
## month.lbl.Q:as.factor(month)6 NA
## month.lbl.C:as.factor(month)6 NA
## month.lbl^4:as.factor(month)6 NA
## month.lbl^5:as.factor(month)6 NA
## month.lbl^6:as.factor(month)6 NA
## month.lbl^7:as.factor(month)6 NA
## month.lbl^8:as.factor(month)6 NA
## month.lbl^9:as.factor(month)6 NA
## month.lbl^10:as.factor(month)6 NA
## month.lbl^11:as.factor(month)6 NA
## month.lbl.L:as.factor(month)7 NA
## month.lbl.Q:as.factor(month)7 NA
## month.lbl.C:as.factor(month)7 NA
## month.lbl^4:as.factor(month)7 NA
## month.lbl^5:as.factor(month)7 NA
## month.lbl^6:as.factor(month)7 NA
## month.lbl^7:as.factor(month)7 NA
## month.lbl^8:as.factor(month)7 NA
## month.lbl^9:as.factor(month)7 NA
## month.lbl^10:as.factor(month)7 NA
## month.lbl^11:as.factor(month)7 NA
## month.lbl.L:as.factor(month)8 NA
## month.lbl.Q:as.factor(month)8 NA
## month.lbl.C:as.factor(month)8 NA
## month.lbl^4:as.factor(month)8 NA
## month.lbl^5:as.factor(month)8 NA
## month.lbl^6:as.factor(month)8 NA
## month.lbl^7:as.factor(month)8 NA
## month.lbl^8:as.factor(month)8 NA
## month.lbl^9:as.factor(month)8 NA
## month.lbl^10:as.factor(month)8 NA
## month.lbl^11:as.factor(month)8 NA
## month.lbl.L:as.factor(month)9 NA
## month.lbl.Q:as.factor(month)9 NA
## month.lbl.C:as.factor(month)9 NA
## month.lbl^4:as.factor(month)9 NA
## month.lbl^5:as.factor(month)9 NA
## month.lbl^6:as.factor(month)9 NA
## month.lbl^7:as.factor(month)9 NA
## month.lbl^8:as.factor(month)9 NA
## month.lbl^9:as.factor(month)9 NA
## month.lbl^10:as.factor(month)9 NA
## month.lbl^11:as.factor(month)9 NA
## month.lbl.L:as.factor(month)10 NA
## month.lbl.Q:as.factor(month)10 NA
## month.lbl.C:as.factor(month)10 NA
## month.lbl^4:as.factor(month)10 NA
## month.lbl^5:as.factor(month)10 NA
## month.lbl^6:as.factor(month)10 NA
## month.lbl^7:as.factor(month)10 NA
## month.lbl^8:as.factor(month)10 NA
## month.lbl^9:as.factor(month)10 NA
## month.lbl^10:as.factor(month)10 NA
## month.lbl^11:as.factor(month)10 NA
## month.lbl.L:as.factor(month)11 NA
## month.lbl.Q:as.factor(month)11 NA
## month.lbl.C:as.factor(month)11 NA
## month.lbl^4:as.factor(month)11 NA
## month.lbl^5:as.factor(month)11 NA
## month.lbl^6:as.factor(month)11 NA
## month.lbl^7:as.factor(month)11 NA
## month.lbl^8:as.factor(month)11 NA
## month.lbl^9:as.factor(month)11 NA
## month.lbl^10:as.factor(month)11 NA
## month.lbl^11:as.factor(month)11 NA
## month.lbl.L:as.factor(month)12 NA
## month.lbl.Q:as.factor(month)12 NA
## month.lbl.C:as.factor(month)12 NA
## month.lbl^4:as.factor(month)12 NA
## month.lbl^5:as.factor(month)12 NA
## month.lbl^6:as.factor(month)12 NA
## month.lbl^7:as.factor(month)12 NA
## month.lbl^8:as.factor(month)12 NA
## month.lbl^9:as.factor(month)12 NA
## month.lbl^10:as.factor(month)12 NA
## month.lbl^11:as.factor(month)12 NA
## t value
## (Intercept) -5.415
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))1 -5.412
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))2 -5.417
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))3 -5.414
## date NA
## special 1.183
## index.num NA
## year 5.415
## half -3.360
## quarter 2.211
## month 5.420
## month.lbl.L NA
## month.lbl.Q 5.351
## month.lbl.C -4.814
## month.lbl^4 -2.280
## month.lbl^5 3.862
## month.lbl^6 -4.549
## month.lbl^7 0.638
## month.lbl^8 -4.445
## month.lbl^9 NA
## month.lbl^10 -3.907
## month.lbl^11 NA
## as.factor(month)2 NA
## as.factor(month)3 NA
## as.factor(month)4 NA
## as.factor(month)5 NA
## as.factor(month)6 NA
## as.factor(month)7 NA
## as.factor(month)8 NA
## as.factor(month)9 NA
## as.factor(month)10 NA
## as.factor(month)11 NA
## as.factor(month)12 NA
## month.lbl.L:as.factor(month)2 NA
## month.lbl.Q:as.factor(month)2 NA
## month.lbl.C:as.factor(month)2 NA
## month.lbl^4:as.factor(month)2 NA
## month.lbl^5:as.factor(month)2 NA
## month.lbl^6:as.factor(month)2 NA
## month.lbl^7:as.factor(month)2 NA
## month.lbl^8:as.factor(month)2 NA
## month.lbl^9:as.factor(month)2 NA
## month.lbl^10:as.factor(month)2 NA
## month.lbl^11:as.factor(month)2 NA
## month.lbl.L:as.factor(month)3 NA
## month.lbl.Q:as.factor(month)3 NA
## month.lbl.C:as.factor(month)3 NA
## month.lbl^4:as.factor(month)3 NA
## month.lbl^5:as.factor(month)3 NA
## month.lbl^6:as.factor(month)3 NA
## month.lbl^7:as.factor(month)3 NA
## month.lbl^8:as.factor(month)3 NA
## month.lbl^9:as.factor(month)3 NA
## month.lbl^10:as.factor(month)3 NA
## month.lbl^11:as.factor(month)3 NA
## month.lbl.L:as.factor(month)4 NA
## month.lbl.Q:as.factor(month)4 NA
## month.lbl.C:as.factor(month)4 NA
## month.lbl^4:as.factor(month)4 NA
## month.lbl^5:as.factor(month)4 NA
## month.lbl^6:as.factor(month)4 NA
## month.lbl^7:as.factor(month)4 NA
## month.lbl^8:as.factor(month)4 NA
## month.lbl^9:as.factor(month)4 NA
## month.lbl^10:as.factor(month)4 NA
## month.lbl^11:as.factor(month)4 NA
## month.lbl.L:as.factor(month)5 NA
## month.lbl.Q:as.factor(month)5 NA
## month.lbl.C:as.factor(month)5 NA
## month.lbl^4:as.factor(month)5 NA
## month.lbl^5:as.factor(month)5 NA
## month.lbl^6:as.factor(month)5 NA
## month.lbl^7:as.factor(month)5 NA
## month.lbl^8:as.factor(month)5 NA
## month.lbl^9:as.factor(month)5 NA
## month.lbl^10:as.factor(month)5 NA
## month.lbl^11:as.factor(month)5 NA
## month.lbl.L:as.factor(month)6 NA
## month.lbl.Q:as.factor(month)6 NA
## month.lbl.C:as.factor(month)6 NA
## month.lbl^4:as.factor(month)6 NA
## month.lbl^5:as.factor(month)6 NA
## month.lbl^6:as.factor(month)6 NA
## month.lbl^7:as.factor(month)6 NA
## month.lbl^8:as.factor(month)6 NA
## month.lbl^9:as.factor(month)6 NA
## month.lbl^10:as.factor(month)6 NA
## month.lbl^11:as.factor(month)6 NA
## month.lbl.L:as.factor(month)7 NA
## month.lbl.Q:as.factor(month)7 NA
## month.lbl.C:as.factor(month)7 NA
## month.lbl^4:as.factor(month)7 NA
## month.lbl^5:as.factor(month)7 NA
## month.lbl^6:as.factor(month)7 NA
## month.lbl^7:as.factor(month)7 NA
## month.lbl^8:as.factor(month)7 NA
## month.lbl^9:as.factor(month)7 NA
## month.lbl^10:as.factor(month)7 NA
## month.lbl^11:as.factor(month)7 NA
## month.lbl.L:as.factor(month)8 NA
## month.lbl.Q:as.factor(month)8 NA
## month.lbl.C:as.factor(month)8 NA
## month.lbl^4:as.factor(month)8 NA
## month.lbl^5:as.factor(month)8 NA
## month.lbl^6:as.factor(month)8 NA
## month.lbl^7:as.factor(month)8 NA
## month.lbl^8:as.factor(month)8 NA
## month.lbl^9:as.factor(month)8 NA
## month.lbl^10:as.factor(month)8 NA
## month.lbl^11:as.factor(month)8 NA
## month.lbl.L:as.factor(month)9 NA
## month.lbl.Q:as.factor(month)9 NA
## month.lbl.C:as.factor(month)9 NA
## month.lbl^4:as.factor(month)9 NA
## month.lbl^5:as.factor(month)9 NA
## month.lbl^6:as.factor(month)9 NA
## month.lbl^7:as.factor(month)9 NA
## month.lbl^8:as.factor(month)9 NA
## month.lbl^9:as.factor(month)9 NA
## month.lbl^10:as.factor(month)9 NA
## month.lbl^11:as.factor(month)9 NA
## month.lbl.L:as.factor(month)10 NA
## month.lbl.Q:as.factor(month)10 NA
## month.lbl.C:as.factor(month)10 NA
## month.lbl^4:as.factor(month)10 NA
## month.lbl^5:as.factor(month)10 NA
## month.lbl^6:as.factor(month)10 NA
## month.lbl^7:as.factor(month)10 NA
## month.lbl^8:as.factor(month)10 NA
## month.lbl^9:as.factor(month)10 NA
## month.lbl^10:as.factor(month)10 NA
## month.lbl^11:as.factor(month)10 NA
## month.lbl.L:as.factor(month)11 NA
## month.lbl.Q:as.factor(month)11 NA
## month.lbl.C:as.factor(month)11 NA
## month.lbl^4:as.factor(month)11 NA
## month.lbl^5:as.factor(month)11 NA
## month.lbl^6:as.factor(month)11 NA
## month.lbl^7:as.factor(month)11 NA
## month.lbl^8:as.factor(month)11 NA
## month.lbl^9:as.factor(month)11 NA
## month.lbl^10:as.factor(month)11 NA
## month.lbl^11:as.factor(month)11 NA
## month.lbl.L:as.factor(month)12 NA
## month.lbl.Q:as.factor(month)12 NA
## month.lbl.C:as.factor(month)12 NA
## month.lbl^4:as.factor(month)12 NA
## month.lbl^5:as.factor(month)12 NA
## month.lbl^6:as.factor(month)12 NA
## month.lbl^7:as.factor(month)12 NA
## month.lbl^8:as.factor(month)12 NA
## month.lbl^9:as.factor(month)12 NA
## month.lbl^10:as.factor(month)12 NA
## month.lbl^11:as.factor(month)12 NA
## Pr(>|t|)
## (Intercept) 1.28e-05
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))1 1.29e-05
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))2 1.27e-05
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))3 1.28e-05
## date NA
## special 0.248015
## index.num NA
## year 1.28e-05
## half 0.002503
## quarter 0.036420
## month 1.26e-05
## month.lbl.L NA
## month.lbl.Q 1.51e-05
## month.lbl.C 6.04e-05
## month.lbl^4 0.031378
## month.lbl^5 0.000706
## month.lbl^6 0.000120
## month.lbl^7 0.529282
## month.lbl^8 0.000157
## month.lbl^9 NA
## month.lbl^10 0.000630
## month.lbl^11 NA
## as.factor(month)2 NA
## as.factor(month)3 NA
## as.factor(month)4 NA
## as.factor(month)5 NA
## as.factor(month)6 NA
## as.factor(month)7 NA
## as.factor(month)8 NA
## as.factor(month)9 NA
## as.factor(month)10 NA
## as.factor(month)11 NA
## as.factor(month)12 NA
## month.lbl.L:as.factor(month)2 NA
## month.lbl.Q:as.factor(month)2 NA
## month.lbl.C:as.factor(month)2 NA
## month.lbl^4:as.factor(month)2 NA
## month.lbl^5:as.factor(month)2 NA
## month.lbl^6:as.factor(month)2 NA
## month.lbl^7:as.factor(month)2 NA
## month.lbl^8:as.factor(month)2 NA
## month.lbl^9:as.factor(month)2 NA
## month.lbl^10:as.factor(month)2 NA
## month.lbl^11:as.factor(month)2 NA
## month.lbl.L:as.factor(month)3 NA
## month.lbl.Q:as.factor(month)3 NA
## month.lbl.C:as.factor(month)3 NA
## month.lbl^4:as.factor(month)3 NA
## month.lbl^5:as.factor(month)3 NA
## month.lbl^6:as.factor(month)3 NA
## month.lbl^7:as.factor(month)3 NA
## month.lbl^8:as.factor(month)3 NA
## month.lbl^9:as.factor(month)3 NA
## month.lbl^10:as.factor(month)3 NA
## month.lbl^11:as.factor(month)3 NA
## month.lbl.L:as.factor(month)4 NA
## month.lbl.Q:as.factor(month)4 NA
## month.lbl.C:as.factor(month)4 NA
## month.lbl^4:as.factor(month)4 NA
## month.lbl^5:as.factor(month)4 NA
## month.lbl^6:as.factor(month)4 NA
## month.lbl^7:as.factor(month)4 NA
## month.lbl^8:as.factor(month)4 NA
## month.lbl^9:as.factor(month)4 NA
## month.lbl^10:as.factor(month)4 NA
## month.lbl^11:as.factor(month)4 NA
## month.lbl.L:as.factor(month)5 NA
## month.lbl.Q:as.factor(month)5 NA
## month.lbl.C:as.factor(month)5 NA
## month.lbl^4:as.factor(month)5 NA
## month.lbl^5:as.factor(month)5 NA
## month.lbl^6:as.factor(month)5 NA
## month.lbl^7:as.factor(month)5 NA
## month.lbl^8:as.factor(month)5 NA
## month.lbl^9:as.factor(month)5 NA
## month.lbl^10:as.factor(month)5 NA
## month.lbl^11:as.factor(month)5 NA
## month.lbl.L:as.factor(month)6 NA
## month.lbl.Q:as.factor(month)6 NA
## month.lbl.C:as.factor(month)6 NA
## month.lbl^4:as.factor(month)6 NA
## month.lbl^5:as.factor(month)6 NA
## month.lbl^6:as.factor(month)6 NA
## month.lbl^7:as.factor(month)6 NA
## month.lbl^8:as.factor(month)6 NA
## month.lbl^9:as.factor(month)6 NA
## month.lbl^10:as.factor(month)6 NA
## month.lbl^11:as.factor(month)6 NA
## month.lbl.L:as.factor(month)7 NA
## month.lbl.Q:as.factor(month)7 NA
## month.lbl.C:as.factor(month)7 NA
## month.lbl^4:as.factor(month)7 NA
## month.lbl^5:as.factor(month)7 NA
## month.lbl^6:as.factor(month)7 NA
## month.lbl^7:as.factor(month)7 NA
## month.lbl^8:as.factor(month)7 NA
## month.lbl^9:as.factor(month)7 NA
## month.lbl^10:as.factor(month)7 NA
## month.lbl^11:as.factor(month)7 NA
## month.lbl.L:as.factor(month)8 NA
## month.lbl.Q:as.factor(month)8 NA
## month.lbl.C:as.factor(month)8 NA
## month.lbl^4:as.factor(month)8 NA
## month.lbl^5:as.factor(month)8 NA
## month.lbl^6:as.factor(month)8 NA
## month.lbl^7:as.factor(month)8 NA
## month.lbl^8:as.factor(month)8 NA
## month.lbl^9:as.factor(month)8 NA
## month.lbl^10:as.factor(month)8 NA
## month.lbl^11:as.factor(month)8 NA
## month.lbl.L:as.factor(month)9 NA
## month.lbl.Q:as.factor(month)9 NA
## month.lbl.C:as.factor(month)9 NA
## month.lbl^4:as.factor(month)9 NA
## month.lbl^5:as.factor(month)9 NA
## month.lbl^6:as.factor(month)9 NA
## month.lbl^7:as.factor(month)9 NA
## month.lbl^8:as.factor(month)9 NA
## month.lbl^9:as.factor(month)9 NA
## month.lbl^10:as.factor(month)9 NA
## month.lbl^11:as.factor(month)9 NA
## month.lbl.L:as.factor(month)10 NA
## month.lbl.Q:as.factor(month)10 NA
## month.lbl.C:as.factor(month)10 NA
## month.lbl^4:as.factor(month)10 NA
## month.lbl^5:as.factor(month)10 NA
## month.lbl^6:as.factor(month)10 NA
## month.lbl^7:as.factor(month)10 NA
## month.lbl^8:as.factor(month)10 NA
## month.lbl^9:as.factor(month)10 NA
## month.lbl^10:as.factor(month)10 NA
## month.lbl^11:as.factor(month)10 NA
## month.lbl.L:as.factor(month)11 NA
## month.lbl.Q:as.factor(month)11 NA
## month.lbl.C:as.factor(month)11 NA
## month.lbl^4:as.factor(month)11 NA
## month.lbl^5:as.factor(month)11 NA
## month.lbl^6:as.factor(month)11 NA
## month.lbl^7:as.factor(month)11 NA
## month.lbl^8:as.factor(month)11 NA
## month.lbl^9:as.factor(month)11 NA
## month.lbl^10:as.factor(month)11 NA
## month.lbl^11:as.factor(month)11 NA
## month.lbl.L:as.factor(month)12 NA
## month.lbl.Q:as.factor(month)12 NA
## month.lbl.C:as.factor(month)12 NA
## month.lbl^4:as.factor(month)12 NA
## month.lbl^5:as.factor(month)12 NA
## month.lbl^6:as.factor(month)12 NA
## month.lbl^7:as.factor(month)12 NA
## month.lbl^8:as.factor(month)12 NA
## month.lbl^9:as.factor(month)12 NA
## month.lbl^10:as.factor(month)12 NA
## month.lbl^11:as.factor(month)12 NA
##
## (Intercept) ***
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))1 ***
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))2 ***
## splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.4)))3 ***
## date
## special
## index.num
## year ***
## half **
## quarter *
## month ***
## month.lbl.L
## month.lbl.Q ***
## month.lbl.C ***
## month.lbl^4 *
## month.lbl^5 ***
## month.lbl^6 ***
## month.lbl^7
## month.lbl^8 ***
## month.lbl^9
## month.lbl^10 ***
## month.lbl^11
## as.factor(month)2
## as.factor(month)3
## as.factor(month)4
## as.factor(month)5
## as.factor(month)6
## as.factor(month)7
## as.factor(month)8
## as.factor(month)9
## as.factor(month)10
## as.factor(month)11
## as.factor(month)12
## month.lbl.L:as.factor(month)2
## month.lbl.Q:as.factor(month)2
## month.lbl.C:as.factor(month)2
## month.lbl^4:as.factor(month)2
## month.lbl^5:as.factor(month)2
## month.lbl^6:as.factor(month)2
## month.lbl^7:as.factor(month)2
## month.lbl^8:as.factor(month)2
## month.lbl^9:as.factor(month)2
## month.lbl^10:as.factor(month)2
## month.lbl^11:as.factor(month)2
## month.lbl.L:as.factor(month)3
## month.lbl.Q:as.factor(month)3
## month.lbl.C:as.factor(month)3
## month.lbl^4:as.factor(month)3
## month.lbl^5:as.factor(month)3
## month.lbl^6:as.factor(month)3
## month.lbl^7:as.factor(month)3
## month.lbl^8:as.factor(month)3
## month.lbl^9:as.factor(month)3
## month.lbl^10:as.factor(month)3
## month.lbl^11:as.factor(month)3
## month.lbl.L:as.factor(month)4
## month.lbl.Q:as.factor(month)4
## month.lbl.C:as.factor(month)4
## month.lbl^4:as.factor(month)4
## month.lbl^5:as.factor(month)4
## month.lbl^6:as.factor(month)4
## month.lbl^7:as.factor(month)4
## month.lbl^8:as.factor(month)4
## month.lbl^9:as.factor(month)4
## month.lbl^10:as.factor(month)4
## month.lbl^11:as.factor(month)4
## month.lbl.L:as.factor(month)5
## month.lbl.Q:as.factor(month)5
## month.lbl.C:as.factor(month)5
## month.lbl^4:as.factor(month)5
## month.lbl^5:as.factor(month)5
## month.lbl^6:as.factor(month)5
## month.lbl^7:as.factor(month)5
## month.lbl^8:as.factor(month)5
## month.lbl^9:as.factor(month)5
## month.lbl^10:as.factor(month)5
## month.lbl^11:as.factor(month)5
## month.lbl.L:as.factor(month)6
## month.lbl.Q:as.factor(month)6
## month.lbl.C:as.factor(month)6
## month.lbl^4:as.factor(month)6
## month.lbl^5:as.factor(month)6
## month.lbl^6:as.factor(month)6
## month.lbl^7:as.factor(month)6
## month.lbl^8:as.factor(month)6
## month.lbl^9:as.factor(month)6
## month.lbl^10:as.factor(month)6
## month.lbl^11:as.factor(month)6
## month.lbl.L:as.factor(month)7
## month.lbl.Q:as.factor(month)7
## month.lbl.C:as.factor(month)7
## month.lbl^4:as.factor(month)7
## month.lbl^5:as.factor(month)7
## month.lbl^6:as.factor(month)7
## month.lbl^7:as.factor(month)7
## month.lbl^8:as.factor(month)7
## month.lbl^9:as.factor(month)7
## month.lbl^10:as.factor(month)7
## month.lbl^11:as.factor(month)7
## month.lbl.L:as.factor(month)8
## month.lbl.Q:as.factor(month)8
## month.lbl.C:as.factor(month)8
## month.lbl^4:as.factor(month)8
## month.lbl^5:as.factor(month)8
## month.lbl^6:as.factor(month)8
## month.lbl^7:as.factor(month)8
## month.lbl^8:as.factor(month)8
## month.lbl^9:as.factor(month)8
## month.lbl^10:as.factor(month)8
## month.lbl^11:as.factor(month)8
## month.lbl.L:as.factor(month)9
## month.lbl.Q:as.factor(month)9
## month.lbl.C:as.factor(month)9
## month.lbl^4:as.factor(month)9
## month.lbl^5:as.factor(month)9
## month.lbl^6:as.factor(month)9
## month.lbl^7:as.factor(month)9
## month.lbl^8:as.factor(month)9
## month.lbl^9:as.factor(month)9
## month.lbl^10:as.factor(month)9
## month.lbl^11:as.factor(month)9
## month.lbl.L:as.factor(month)10
## month.lbl.Q:as.factor(month)10
## month.lbl.C:as.factor(month)10
## month.lbl^4:as.factor(month)10
## month.lbl^5:as.factor(month)10
## month.lbl^6:as.factor(month)10
## month.lbl^7:as.factor(month)10
## month.lbl^8:as.factor(month)10
## month.lbl^9:as.factor(month)10
## month.lbl^10:as.factor(month)10
## month.lbl^11:as.factor(month)10
## month.lbl.L:as.factor(month)11
## month.lbl.Q:as.factor(month)11
## month.lbl.C:as.factor(month)11
## month.lbl^4:as.factor(month)11
## month.lbl^5:as.factor(month)11
## month.lbl^6:as.factor(month)11
## month.lbl^7:as.factor(month)11
## month.lbl^8:as.factor(month)11
## month.lbl^9:as.factor(month)11
## month.lbl^10:as.factor(month)11
## month.lbl^11:as.factor(month)11
## month.lbl.L:as.factor(month)12
## month.lbl.Q:as.factor(month)12
## month.lbl.C:as.factor(month)12
## month.lbl^4:as.factor(month)12
## month.lbl^5:as.factor(month)12
## month.lbl^6:as.factor(month)12
## month.lbl^7:as.factor(month)12
## month.lbl^8:as.factor(month)12
## month.lbl^9:as.factor(month)12
## month.lbl^10:as.factor(month)12
## month.lbl^11:as.factor(month)12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2799 on 25 degrees of freedom
## Multiple R-squared: 0.7113, Adjusted R-squared: 0.5265
## F-statistic: 3.85 on 16 and 25 DF, p-value: 0.001301
Explore trend & seasonality:
msts(data$change_yoy, seasonal.periods = c(3, 6, 12)) %>%
mstl() %>%
autoplot()
Transform the data and preparing it before spliting it:
limit_lower <- 0
limit_upper <- 116
offset <- 27
std_mean <- -0.893616623334282
std_sd <- 0.611192676495043
horizon <- 3
lag_period <- 3
rolling_periods <- c(1, 3, 12)
data_transformed_tbl <- data %>%
mutate(trans_change_yoy = log_interval_vec(
change_yoy,
limit_lower = limit_lower,
limit_upper = limit_upper,
offset = offset
) %>%
standardize_vec()
) %>%
mutate(cleaned = ts_clean_vec(trans_change_yoy, period = 3)) %>%
mutate(trans_change_yoy = ifelse(
date %>% between_time("2020-03-01", "2020-05-01") |
date %>% between_time("2021-03-01", "2021-05-01"),
cleaned,
trans_change_yoy)) %>%
select(-change_yoy, -cleaned)
## log_interval_vec():
## Using limit_lower: 0
## Using limit_upper: 116
## Using offset: 27
## Standardization Parameters
## mean: -0.90525668945564
## standard deviation: 0.598941539811406
data_prepared_full_tbl <- data_transformed_tbl %>%
bind_rows(
future_frame(.data = ., .date_var = date, .length_out = horizon)
) %>%
tk_augment_lags(trans_change_yoy, .lags = lag_period) %>%
tk_augment_slidify(
.value = trans_change_yoy_lag3,
.f = mean,
.period = rolling_periods,
.align = "center",
.partial = TRUE
) %>%
left_join(special_dates_tbl, by = c("date")) %>%
select(-change_yoy)
data_prepared_full_tbl %>%
pivot_longer(-date) %>%
plot_time_series(date, value, name, .smooth = FALSE)
data_prepared_full_tbl %>% tail(horizon + 1)
## # A tibble: 4 × 7
## date trans_change_yoy trans_change_yoy…¹ trans…² trans…³ trans…⁴ special
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2022-08-01 0.128 0.0606 0.0606 0.0227 0.155 0
## 2 2022-09-01 NA 0.0631 0.0631 0.0118 0.102 NA
## 3 2022-10-01 NA -0.0883 -0.0883 0.0342 0.0945 NA
## 4 2022-11-01 NA 0.128 0.128 0.0198 0.0111 NA
## # … with abbreviated variable names ¹trans_change_yoy_lag3,
## # ²trans_change_yoy_lag3_roll_1, ³trans_change_yoy_lag3_roll_3,
## # ⁴trans_change_yoy_lag3_roll_12
Preparing a time series validation strategy:
data_prepared_tbl <- data_prepared_full_tbl %>%
filter(!is.na(trans_change_yoy))
data_prepared_tbl
## # A tibble: 42 × 7
## date trans_change_yoy trans_change_yo…¹ trans…² trans…³ trans…⁴ special
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2019-03-01 -0.577 NA NA NA NA 0
## 2 2019-04-01 -0.200 NA NA NA NA 0
## 3 2019-05-01 -0.473 NA NA NA NA 0
## 4 2019-06-01 -0.564 -0.577 -0.577 NA NA 0
## 5 2019-07-01 -0.246 -0.200 -0.200 -0.417 NA 0
## 6 2019-08-01 -0.320 -0.473 -0.473 -0.412 NA 0
## 7 2019-09-01 -0.354 -0.564 -0.564 -0.427 NA 0
## 8 2019-10-01 -0.330 -0.246 -0.246 -0.377 NA 0
## 9 2019-11-01 -0.385 -0.320 -0.320 -0.307 -0.316 0
## 10 2019-12-01 -0.258 -0.354 -0.354 -0.335 -0.378 0
## # … with 32 more rows, and abbreviated variable names ¹trans_change_yoy_lag3,
## # ²trans_change_yoy_lag3_roll_1, ³trans_change_yoy_lag3_roll_3,
## # ⁴trans_change_yoy_lag3_roll_12
forecast_tbl <- data_prepared_full_tbl %>%
filter(is.na(trans_change_yoy))
forecast_tbl
## # A tibble: 3 × 7
## date trans_change_yoy trans_change_yoy…¹ trans…² trans…³ trans…⁴ special
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2022-09-01 NA 0.0631 0.0631 0.0118 0.102 NA
## 2 2022-10-01 NA -0.0883 -0.0883 0.0342 0.0945 NA
## 3 2022-11-01 NA 0.128 0.128 0.0198 0.0111 NA
## # … with abbreviated variable names ¹trans_change_yoy_lag3,
## # ²trans_change_yoy_lag3_roll_1, ³trans_change_yoy_lag3_roll_3,
## # ⁴trans_change_yoy_lag3_roll_12
splits <- data_prepared_tbl %>%
time_series_split(
date_var = date,
assess = "3 months",
cumulative = TRUE
)
p <- splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(date, trans_change_yoy, .interactive = FALSE) +
labs(title = "Sequential Time Series Initial Split")
ggplotly(p)
Preparing a resample of time series cross-validation:
resamples_tscv_lag <- time_series_cv(
data = training(splits),
date_var = date,
cumulative = TRUE,
initial = "12 months",
assess = "3 months",
slice_limit = 9
)
p <- resamples_tscv_lag %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(date, trans_change_yoy, .interactive = FALSE) +
facet_wrap(c(".id")) +
labs(title = "Sequential Time Series Cross-Validation")
ggplotly(p)
set.seed(123)
resamples_kfold <- vfold_cv(
training(splits),
v = 9
)
p <- resamples_kfold %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(date, trans_change_yoy, .interactive = FALSE) +
facet_wrap(c(".id")) +
labs(title = "Sequential Time Series Cross-Validation")
ggplotly(p)
Save the data:
list(
data_prepared_tbl = data_prepared_tbl,
forecast_tbl = forecast_tbl,
splits = splits,
resamples_tscv_lag = resamples_tscv_lag,
resamples_kfold = resamples_kfold
) %>%
write_rds("data/data.rds")
Calendar features:
recipe_calendar_spec <- recipe(trans_change_yoy ~ ., data = training(splits)) %>%
step_timeseries_signature(date) %>%
step_rm(
ends_with(".iso"),
ends_with(".xts"),
contains("am.pm"),
contains("second"),
contains("minute"),
contains("hour"),
contains("day"),
contains("week"),
) %>%
step_normalize(
ends_with("index.num"), ends_with("_year")
) %>%
step_dummy(all_nominal()) %>%
step_interact(~ matches("month") * matches("month.lbl")) %>%
step_fourier(date, period = c(1, 3, 12), K = 3)
recipe_calendar_spec %>% prep() %>% juice() %>% glimpse()
## Rows: 39
## Columns: 107
## $ date <date> 2019-03-01, 2019-04-01, 2019-05…
## $ trans_change_yoy_lag3 <dbl> NA, NA, NA, -0.57704392, -0.2003…
## $ trans_change_yoy_lag3_roll_1 <dbl> NA, NA, NA, -0.57704392, -0.2003…
## $ trans_change_yoy_lag3_roll_3 <dbl> NA, NA, NA, NA, -0.4167281, -0.4…
## $ trans_change_yoy_lag3_roll_12 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, …
## $ special <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ trans_change_yoy <dbl> -0.57704392, -0.20033185, -0.472…
## $ date_index.num <dbl> -1.669024460, -1.579754836, -1.4…
## $ date_year <dbl> -1.3024299, -1.3024299, -1.30242…
## $ date_half <int> 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1,…
## $ date_quarter <int> 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1,…
## $ date_month <int> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,…
## $ date_month.lbl_01 <dbl> -0.2926847, -0.2090605, -0.12543…
## $ date_month.lbl_02 <dbl> 0.009124148, -0.155110522, -0.26…
## $ date_month.lbl_03 <dbl> 0.29268470, 0.34843417, 0.264809…
## $ date_month.lbl_04 <dbl> -0.3687669, -0.1452718, 0.134097…
## $ date_month.lbl_05 <dbl> 0.1664780, -0.2298982, -0.348811…
## $ date_month.lbl_06 <dbl> 0.16419739, 0.37317590, 0.059708…
## $ date_month.lbl_07 <dbl> -0.41291385, -0.13654123, 0.3355…
## $ date_month.lbl_08 <dbl> 0.46601138, -0.25454403, -0.2897…
## $ date_month.lbl_09 <dbl> -0.35520473, 0.47412790, -0.1596…
## $ date_month.lbl_10 <dbl> 0.190963389, -0.409207262, 0.491…
## $ date_month.lbl_11 <dbl> -0.065483987, 0.196451960, -0.39…
## $ date_month_x_date_month.lbl_01 <dbl> -0.8780541, -0.8362420, -0.62718…
## $ date_month_x_date_month.lbl_02 <dbl> 0.02737245, -0.62044209, -1.3230…
## $ date_month_x_date_month.lbl_03 <dbl> 0.8780541, 1.3937367, 1.3240498,…
## $ date_month_x_date_month.lbl_04 <dbl> -1.1063006, -0.5810872, 0.670485…
## $ date_month_x_date_month.lbl_05 <dbl> 0.4994341, -0.9195929, -1.744055…
## $ date_month_x_date_month.lbl_06 <dbl> 0.4925922, 1.4927036, 0.2985407,…
## $ date_month_x_date_month.lbl_07 <dbl> -1.23874154, -0.54616493, 1.6779…
## $ date_month_x_date_month.lbl_08 <dbl> 1.39803414, -1.01817612, -1.4489…
## $ date_month_x_date_month.lbl_09 <dbl> -1.06561418, 1.89651158, -0.7980…
## $ date_month_x_date_month.lbl_10 <dbl> 0.572890167, -1.636829049, 2.455…
## $ date_month_x_date_month.lbl_11 <dbl> -0.196451960, 0.785807840, -1.96…
## $ date_month.lbl_01_x_date_month.lbl_02 <dbl> -0.002670499, 0.032427484, 0.033…
## $ date_month.lbl_01_x_date_month.lbl_03 <dbl> -0.085664336, -0.072843823, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_04 <dbl> 0.10793242, 0.03037060, -0.01682…
## $ date_month.lbl_01_x_date_month.lbl_05 <dbl> -0.048725571, 0.048062638, 0.043…
## $ date_month.lbl_01_x_date_month.lbl_06 <dbl> -0.048058066, -0.078016340, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_07 <dbl> 0.120853567, 0.028545379, -0.042…
## $ date_month.lbl_01_x_date_month.lbl_08 <dbl> -0.13639440, 0.05321510, 0.03635…
## $ date_month.lbl_01_x_date_month.lbl_09 <dbl> 0.103962990, -0.099121416, 0.020…
## $ date_month.lbl_01_x_date_month.lbl_10 <dbl> -0.055892063, 0.085549076, -0.06…
## $ date_month.lbl_01_x_date_month.lbl_11 <dbl> 0.0191661612, -0.0410703455, 0.0…
## $ date_month.lbl_02_x_date_month.lbl_03 <dbl> 0.002670499, -0.054045806, -0.07…
## $ date_month.lbl_02_x_date_month.lbl_04 <dbl> -0.003364684, 0.022533185, -0.03…
## $ date_month.lbl_02_x_date_month.lbl_05 <dbl> 0.00151897, 0.03565963, 0.092295…
## $ date_month.lbl_02_x_date_month.lbl_06 <dbl> 0.001498161, -0.057883508, -0.01…
## $ date_month.lbl_02_x_date_month.lbl_07 <dbl> -0.003767487, 0.021178982, -0.08…
## $ date_month.lbl_02_x_date_month.lbl_08 <dbl> 0.004251957, 0.039482457, 0.0766…
## $ date_month.lbl_02_x_date_month.lbl_09 <dbl> -0.003240941, -0.073542225, 0.04…
## $ date_month.lbl_02_x_date_month.lbl_10 <dbl> 0.001742378, 0.063472352, -0.129…
## $ date_month.lbl_02_x_date_month.lbl_11 <dbl> -0.0005974856, -0.0304717661, 0.…
## $ date_month.lbl_03_x_date_month.lbl_04 <dbl> -0.10793242, -0.05061766, 0.0355…
## $ date_month.lbl_03_x_date_month.lbl_05 <dbl> 0.04872557, -0.08010440, -0.0923…
## $ date_month.lbl_03_x_date_month.lbl_06 <dbl> 0.04805807, 0.13002723, 0.015811…
## $ date_month.lbl_03_x_date_month.lbl_07 <dbl> -0.12085357, -0.04757563, 0.0888…
## $ date_month.lbl_03_x_date_month.lbl_08 <dbl> 0.136394402, -0.088691838, -0.07…
## $ date_month.lbl_03_x_date_month.lbl_09 <dbl> -0.103962990, 0.165202360, -0.04…
## $ date_month.lbl_03_x_date_month.lbl_10 <dbl> 0.055892063, -0.142581793, 0.130…
## $ date_month.lbl_03_x_date_month.lbl_11 <dbl> -0.0191661612, 0.0684505758, -0.…
## $ date_month.lbl_04_x_date_month.lbl_05 <dbl> -0.06139158, 0.03339773, -0.0467…
## $ date_month.lbl_04_x_date_month.lbl_06 <dbl> -0.060550561, -0.054211934, 0.00…
## $ date_month.lbl_04_x_date_month.lbl_07 <dbl> 0.15226895, 0.01983559, 0.045002…
## $ date_month.lbl_04_x_date_month.lbl_08 <dbl> -0.17184956, 0.03697807, -0.0388…
## $ date_month.lbl_04_x_date_month.lbl_09 <dbl> 0.130987739, -0.068877413, -0.02…
## $ date_month.lbl_04_x_date_month.lbl_10 <dbl> -0.070420973, 0.059446276, 0.065…
## $ date_month.lbl_04_x_date_month.lbl_11 <dbl> 0.0241483254, -0.0285389300, -0.…
## $ date_month.lbl_05_x_date_month.lbl_06 <dbl> 0.02733526, -0.08579248, -0.0208…
## $ date_month.lbl_05_x_date_month.lbl_07 <dbl> -0.06874108, 0.03139059, -0.1170…
## $ date_month.lbl_05_x_date_month.lbl_08 <dbl> 0.07758065, 0.05851922, 0.101081…
## $ date_month.lbl_05_x_date_month.lbl_09 <dbl> -0.05913378, -0.10900116, 0.0556…
## $ date_month.lbl_05_x_date_month.lbl_10 <dbl> 0.03179121, 0.09407602, -0.17128…
## $ date_month.lbl_05_x_date_month.lbl_11 <dbl> -0.0109016446, -0.0451639563, 0.…
## $ date_month.lbl_06_x_date_month.lbl_07 <dbl> -0.06779938, -0.05095390, 0.0200…
## $ date_month.lbl_06_x_date_month.lbl_08 <dbl> 0.076517854, -0.094989697, -0.01…
## $ date_month.lbl_06_x_date_month.lbl_09 <dbl> -0.058323691, 0.176933102, -0.00…
## $ date_month.lbl_06_x_date_month.lbl_10 <dbl> 0.0313556909, -0.1527062868, 0.0…
## $ date_month.lbl_06_x_date_month.lbl_11 <dbl> -0.0107523000, 0.0733111362, -0.…
## $ date_month.lbl_07_x_date_month.lbl_08 <dbl> -0.192422552, 0.034755756, -0.09…
## $ date_month.lbl_07_x_date_month.lbl_09 <dbl> 0.146668951, -0.064738007, -0.05…
## $ date_month.lbl_07_x_date_month.lbl_10 <dbl> -0.0788514278, 0.0558736640, 0.1…
## $ date_month.lbl_07_x_date_month.lbl_11 <dbl> 0.0270392449, -0.0268237928, -0.…
## $ date_month.lbl_08_x_date_month.lbl_09 <dbl> -0.1655294447, -0.1206864256, 0.…
## $ date_month.lbl_08_x_date_month.lbl_10 <dbl> 0.0889911123, 0.1041612659, -0.1…
## $ date_month.lbl_08_x_date_month.lbl_11 <dbl> -3.051628e-02, -5.000567e-02, 1.…
## $ date_month.lbl_09_x_date_month.lbl_10 <dbl> -6.783110e-02, -1.940166e-01, -7…
## $ date_month.lbl_09_x_date_month.lbl_11 <dbl> 2.326022e-02, 9.314335e-02, 6.27…
## $ date_month.lbl_10_x_date_month.lbl_11 <dbl> -1.250504e-02, -8.038957e-02, -1…
## $ date_sin1_K1 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos1_K1 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin1_K2 <dbl> 2.993631e-01, 2.993631e-01, 6.51…
## $ date_cos1_K2 <dbl> -0.95413926, -0.95413926, -0.758…
## $ date_sin1_K3 <dbl> -8.978045e-01, -8.978045e-01, -4…
## $ date_cos1_K3 <dbl> -0.4403942, -0.4403942, -0.87434…
## $ date_sin3_K1 <dbl> 4.554948e-01, 5.432217e-01, -9.9…
## $ date_cos3_K1 <dbl> 0.8902385, -0.8395893, -0.117956…
## $ date_sin3_K2 <dbl> 8.109979e-01, -9.121663e-01, 2.3…
## $ date_cos3_K2 <dbl> 0.58504900, 0.40982032, -0.97217…
## $ date_sin3_K3 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos3_K3 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin12_K1 <dbl> 9.930187e-01, 8.010011e-01, 4.09…
## $ date_cos12_K1 <dbl> -0.11795672, -0.59866288, -0.912…
## $ date_sin12_K2 <dbl> -2.342665e-01, -9.590592e-01, -7…
## $ date_cos12_K2 <dbl> -0.9721724, -0.2832055, 0.664094…
## $ date_sin12_K3 <dbl> -9.377521e-01, 3.473053e-01, 9.5…
## $ date_cos12_K3 <dbl> 3.473053e-01, 9.377521e-01, -2.9…
recipe_calendar_spec_nolag <- recipe_calendar_spec %>%
step_rm(contains("_lag"))
recipe_calendar_spec_lag <- recipe_calendar_spec %>%
step_naomit(contains("_lag"))
recipe_calendar_spec_nolag %>% prep() %>% juice() %>% glimpse()
## Rows: 39
## Columns: 103
## $ date <date> 2019-03-01, 2019-04-01, 2019-05…
## $ special <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ trans_change_yoy <dbl> -0.57704392, -0.20033185, -0.472…
## $ date_index.num <dbl> -1.669024460, -1.579754836, -1.4…
## $ date_year <dbl> -1.3024299, -1.3024299, -1.30242…
## $ date_half <int> 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1,…
## $ date_quarter <int> 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1,…
## $ date_month <int> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,…
## $ date_month.lbl_01 <dbl> -0.2926847, -0.2090605, -0.12543…
## $ date_month.lbl_02 <dbl> 0.009124148, -0.155110522, -0.26…
## $ date_month.lbl_03 <dbl> 0.29268470, 0.34843417, 0.264809…
## $ date_month.lbl_04 <dbl> -0.3687669, -0.1452718, 0.134097…
## $ date_month.lbl_05 <dbl> 0.1664780, -0.2298982, -0.348811…
## $ date_month.lbl_06 <dbl> 0.16419739, 0.37317590, 0.059708…
## $ date_month.lbl_07 <dbl> -0.41291385, -0.13654123, 0.3355…
## $ date_month.lbl_08 <dbl> 0.46601138, -0.25454403, -0.2897…
## $ date_month.lbl_09 <dbl> -0.35520473, 0.47412790, -0.1596…
## $ date_month.lbl_10 <dbl> 0.190963389, -0.409207262, 0.491…
## $ date_month.lbl_11 <dbl> -0.065483987, 0.196451960, -0.39…
## $ date_month_x_date_month.lbl_01 <dbl> -0.8780541, -0.8362420, -0.62718…
## $ date_month_x_date_month.lbl_02 <dbl> 0.02737245, -0.62044209, -1.3230…
## $ date_month_x_date_month.lbl_03 <dbl> 0.8780541, 1.3937367, 1.3240498,…
## $ date_month_x_date_month.lbl_04 <dbl> -1.1063006, -0.5810872, 0.670485…
## $ date_month_x_date_month.lbl_05 <dbl> 0.4994341, -0.9195929, -1.744055…
## $ date_month_x_date_month.lbl_06 <dbl> 0.4925922, 1.4927036, 0.2985407,…
## $ date_month_x_date_month.lbl_07 <dbl> -1.23874154, -0.54616493, 1.6779…
## $ date_month_x_date_month.lbl_08 <dbl> 1.39803414, -1.01817612, -1.4489…
## $ date_month_x_date_month.lbl_09 <dbl> -1.06561418, 1.89651158, -0.7980…
## $ date_month_x_date_month.lbl_10 <dbl> 0.572890167, -1.636829049, 2.455…
## $ date_month_x_date_month.lbl_11 <dbl> -0.196451960, 0.785807840, -1.96…
## $ date_month.lbl_01_x_date_month.lbl_02 <dbl> -0.002670499, 0.032427484, 0.033…
## $ date_month.lbl_01_x_date_month.lbl_03 <dbl> -0.085664336, -0.072843823, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_04 <dbl> 0.10793242, 0.03037060, -0.01682…
## $ date_month.lbl_01_x_date_month.lbl_05 <dbl> -0.048725571, 0.048062638, 0.043…
## $ date_month.lbl_01_x_date_month.lbl_06 <dbl> -0.048058066, -0.078016340, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_07 <dbl> 0.120853567, 0.028545379, -0.042…
## $ date_month.lbl_01_x_date_month.lbl_08 <dbl> -0.13639440, 0.05321510, 0.03635…
## $ date_month.lbl_01_x_date_month.lbl_09 <dbl> 0.103962990, -0.099121416, 0.020…
## $ date_month.lbl_01_x_date_month.lbl_10 <dbl> -0.055892063, 0.085549076, -0.06…
## $ date_month.lbl_01_x_date_month.lbl_11 <dbl> 0.0191661612, -0.0410703455, 0.0…
## $ date_month.lbl_02_x_date_month.lbl_03 <dbl> 0.002670499, -0.054045806, -0.07…
## $ date_month.lbl_02_x_date_month.lbl_04 <dbl> -0.003364684, 0.022533185, -0.03…
## $ date_month.lbl_02_x_date_month.lbl_05 <dbl> 0.00151897, 0.03565963, 0.092295…
## $ date_month.lbl_02_x_date_month.lbl_06 <dbl> 0.001498161, -0.057883508, -0.01…
## $ date_month.lbl_02_x_date_month.lbl_07 <dbl> -0.003767487, 0.021178982, -0.08…
## $ date_month.lbl_02_x_date_month.lbl_08 <dbl> 0.004251957, 0.039482457, 0.0766…
## $ date_month.lbl_02_x_date_month.lbl_09 <dbl> -0.003240941, -0.073542225, 0.04…
## $ date_month.lbl_02_x_date_month.lbl_10 <dbl> 0.001742378, 0.063472352, -0.129…
## $ date_month.lbl_02_x_date_month.lbl_11 <dbl> -0.0005974856, -0.0304717661, 0.…
## $ date_month.lbl_03_x_date_month.lbl_04 <dbl> -0.10793242, -0.05061766, 0.0355…
## $ date_month.lbl_03_x_date_month.lbl_05 <dbl> 0.04872557, -0.08010440, -0.0923…
## $ date_month.lbl_03_x_date_month.lbl_06 <dbl> 0.04805807, 0.13002723, 0.015811…
## $ date_month.lbl_03_x_date_month.lbl_07 <dbl> -0.12085357, -0.04757563, 0.0888…
## $ date_month.lbl_03_x_date_month.lbl_08 <dbl> 0.136394402, -0.088691838, -0.07…
## $ date_month.lbl_03_x_date_month.lbl_09 <dbl> -0.103962990, 0.165202360, -0.04…
## $ date_month.lbl_03_x_date_month.lbl_10 <dbl> 0.055892063, -0.142581793, 0.130…
## $ date_month.lbl_03_x_date_month.lbl_11 <dbl> -0.0191661612, 0.0684505758, -0.…
## $ date_month.lbl_04_x_date_month.lbl_05 <dbl> -0.06139158, 0.03339773, -0.0467…
## $ date_month.lbl_04_x_date_month.lbl_06 <dbl> -0.060550561, -0.054211934, 0.00…
## $ date_month.lbl_04_x_date_month.lbl_07 <dbl> 0.15226895, 0.01983559, 0.045002…
## $ date_month.lbl_04_x_date_month.lbl_08 <dbl> -0.17184956, 0.03697807, -0.0388…
## $ date_month.lbl_04_x_date_month.lbl_09 <dbl> 0.130987739, -0.068877413, -0.02…
## $ date_month.lbl_04_x_date_month.lbl_10 <dbl> -0.070420973, 0.059446276, 0.065…
## $ date_month.lbl_04_x_date_month.lbl_11 <dbl> 0.0241483254, -0.0285389300, -0.…
## $ date_month.lbl_05_x_date_month.lbl_06 <dbl> 0.02733526, -0.08579248, -0.0208…
## $ date_month.lbl_05_x_date_month.lbl_07 <dbl> -0.06874108, 0.03139059, -0.1170…
## $ date_month.lbl_05_x_date_month.lbl_08 <dbl> 0.07758065, 0.05851922, 0.101081…
## $ date_month.lbl_05_x_date_month.lbl_09 <dbl> -0.05913378, -0.10900116, 0.0556…
## $ date_month.lbl_05_x_date_month.lbl_10 <dbl> 0.03179121, 0.09407602, -0.17128…
## $ date_month.lbl_05_x_date_month.lbl_11 <dbl> -0.0109016446, -0.0451639563, 0.…
## $ date_month.lbl_06_x_date_month.lbl_07 <dbl> -0.06779938, -0.05095390, 0.0200…
## $ date_month.lbl_06_x_date_month.lbl_08 <dbl> 0.076517854, -0.094989697, -0.01…
## $ date_month.lbl_06_x_date_month.lbl_09 <dbl> -0.058323691, 0.176933102, -0.00…
## $ date_month.lbl_06_x_date_month.lbl_10 <dbl> 0.0313556909, -0.1527062868, 0.0…
## $ date_month.lbl_06_x_date_month.lbl_11 <dbl> -0.0107523000, 0.0733111362, -0.…
## $ date_month.lbl_07_x_date_month.lbl_08 <dbl> -0.192422552, 0.034755756, -0.09…
## $ date_month.lbl_07_x_date_month.lbl_09 <dbl> 0.146668951, -0.064738007, -0.05…
## $ date_month.lbl_07_x_date_month.lbl_10 <dbl> -0.0788514278, 0.0558736640, 0.1…
## $ date_month.lbl_07_x_date_month.lbl_11 <dbl> 0.0270392449, -0.0268237928, -0.…
## $ date_month.lbl_08_x_date_month.lbl_09 <dbl> -0.1655294447, -0.1206864256, 0.…
## $ date_month.lbl_08_x_date_month.lbl_10 <dbl> 0.0889911123, 0.1041612659, -0.1…
## $ date_month.lbl_08_x_date_month.lbl_11 <dbl> -3.051628e-02, -5.000567e-02, 1.…
## $ date_month.lbl_09_x_date_month.lbl_10 <dbl> -6.783110e-02, -1.940166e-01, -7…
## $ date_month.lbl_09_x_date_month.lbl_11 <dbl> 2.326022e-02, 9.314335e-02, 6.27…
## $ date_month.lbl_10_x_date_month.lbl_11 <dbl> -1.250504e-02, -8.038957e-02, -1…
## $ date_sin1_K1 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos1_K1 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin1_K2 <dbl> 2.993631e-01, 2.993631e-01, 6.51…
## $ date_cos1_K2 <dbl> -0.95413926, -0.95413926, -0.758…
## $ date_sin1_K3 <dbl> -8.978045e-01, -8.978045e-01, -4…
## $ date_cos1_K3 <dbl> -0.4403942, -0.4403942, -0.87434…
## $ date_sin3_K1 <dbl> 4.554948e-01, 5.432217e-01, -9.9…
## $ date_cos3_K1 <dbl> 0.8902385, -0.8395893, -0.117956…
## $ date_sin3_K2 <dbl> 8.109979e-01, -9.121663e-01, 2.3…
## $ date_cos3_K2 <dbl> 0.58504900, 0.40982032, -0.97217…
## $ date_sin3_K3 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos3_K3 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin12_K1 <dbl> 9.930187e-01, 8.010011e-01, 4.09…
## $ date_cos12_K1 <dbl> -0.11795672, -0.59866288, -0.912…
## $ date_sin12_K2 <dbl> -2.342665e-01, -9.590592e-01, -7…
## $ date_cos12_K2 <dbl> -0.9721724, -0.2832055, 0.664094…
## $ date_sin12_K3 <dbl> -9.377521e-01, 3.473053e-01, 9.5…
## $ date_cos12_K3 <dbl> 3.473053e-01, 9.377521e-01, -2.9…
Calendar Spline features
recipe_spec_spline <- recipe_calendar_spec %>%
step_rm(date) %>%
step_ns(ends_with("index.num"), deg_free = 2) %>%
step_rm(contains("_lag"))
recipe_spec_spline %>% prep() %>% juice() %>% glimpse()
## Rows: 39
## Columns: 103
## $ special <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ trans_change_yoy <dbl> -0.57704392, -0.20033185, -0.472…
## $ date_year <dbl> -1.3024299, -1.3024299, -1.30242…
## $ date_half <int> 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1,…
## $ date_quarter <int> 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1,…
## $ date_month <int> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,…
## $ date_month.lbl_01 <dbl> -0.2926847, -0.2090605, -0.12543…
## $ date_month.lbl_02 <dbl> 0.009124148, -0.155110522, -0.26…
## $ date_month.lbl_03 <dbl> 0.29268470, 0.34843417, 0.264809…
## $ date_month.lbl_04 <dbl> -0.3687669, -0.1452718, 0.134097…
## $ date_month.lbl_05 <dbl> 0.1664780, -0.2298982, -0.348811…
## $ date_month.lbl_06 <dbl> 0.16419739, 0.37317590, 0.059708…
## $ date_month.lbl_07 <dbl> -0.41291385, -0.13654123, 0.3355…
## $ date_month.lbl_08 <dbl> 0.46601138, -0.25454403, -0.2897…
## $ date_month.lbl_09 <dbl> -0.35520473, 0.47412790, -0.1596…
## $ date_month.lbl_10 <dbl> 0.190963389, -0.409207262, 0.491…
## $ date_month.lbl_11 <dbl> -0.065483987, 0.196451960, -0.39…
## $ date_month_x_date_month.lbl_01 <dbl> -0.8780541, -0.8362420, -0.62718…
## $ date_month_x_date_month.lbl_02 <dbl> 0.02737245, -0.62044209, -1.3230…
## $ date_month_x_date_month.lbl_03 <dbl> 0.8780541, 1.3937367, 1.3240498,…
## $ date_month_x_date_month.lbl_04 <dbl> -1.1063006, -0.5810872, 0.670485…
## $ date_month_x_date_month.lbl_05 <dbl> 0.4994341, -0.9195929, -1.744055…
## $ date_month_x_date_month.lbl_06 <dbl> 0.4925922, 1.4927036, 0.2985407,…
## $ date_month_x_date_month.lbl_07 <dbl> -1.23874154, -0.54616493, 1.6779…
## $ date_month_x_date_month.lbl_08 <dbl> 1.39803414, -1.01817612, -1.4489…
## $ date_month_x_date_month.lbl_09 <dbl> -1.06561418, 1.89651158, -0.7980…
## $ date_month_x_date_month.lbl_10 <dbl> 0.572890167, -1.636829049, 2.455…
## $ date_month_x_date_month.lbl_11 <dbl> -0.196451960, 0.785807840, -1.96…
## $ date_month.lbl_01_x_date_month.lbl_02 <dbl> -0.002670499, 0.032427484, 0.033…
## $ date_month.lbl_01_x_date_month.lbl_03 <dbl> -0.085664336, -0.072843823, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_04 <dbl> 0.10793242, 0.03037060, -0.01682…
## $ date_month.lbl_01_x_date_month.lbl_05 <dbl> -0.048725571, 0.048062638, 0.043…
## $ date_month.lbl_01_x_date_month.lbl_06 <dbl> -0.048058066, -0.078016340, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_07 <dbl> 0.120853567, 0.028545379, -0.042…
## $ date_month.lbl_01_x_date_month.lbl_08 <dbl> -0.13639440, 0.05321510, 0.03635…
## $ date_month.lbl_01_x_date_month.lbl_09 <dbl> 0.103962990, -0.099121416, 0.020…
## $ date_month.lbl_01_x_date_month.lbl_10 <dbl> -0.055892063, 0.085549076, -0.06…
## $ date_month.lbl_01_x_date_month.lbl_11 <dbl> 0.0191661612, -0.0410703455, 0.0…
## $ date_month.lbl_02_x_date_month.lbl_03 <dbl> 0.002670499, -0.054045806, -0.07…
## $ date_month.lbl_02_x_date_month.lbl_04 <dbl> -0.003364684, 0.022533185, -0.03…
## $ date_month.lbl_02_x_date_month.lbl_05 <dbl> 0.00151897, 0.03565963, 0.092295…
## $ date_month.lbl_02_x_date_month.lbl_06 <dbl> 0.001498161, -0.057883508, -0.01…
## $ date_month.lbl_02_x_date_month.lbl_07 <dbl> -0.003767487, 0.021178982, -0.08…
## $ date_month.lbl_02_x_date_month.lbl_08 <dbl> 0.004251957, 0.039482457, 0.0766…
## $ date_month.lbl_02_x_date_month.lbl_09 <dbl> -0.003240941, -0.073542225, 0.04…
## $ date_month.lbl_02_x_date_month.lbl_10 <dbl> 0.001742378, 0.063472352, -0.129…
## $ date_month.lbl_02_x_date_month.lbl_11 <dbl> -0.0005974856, -0.0304717661, 0.…
## $ date_month.lbl_03_x_date_month.lbl_04 <dbl> -0.10793242, -0.05061766, 0.0355…
## $ date_month.lbl_03_x_date_month.lbl_05 <dbl> 0.04872557, -0.08010440, -0.0923…
## $ date_month.lbl_03_x_date_month.lbl_06 <dbl> 0.04805807, 0.13002723, 0.015811…
## $ date_month.lbl_03_x_date_month.lbl_07 <dbl> -0.12085357, -0.04757563, 0.0888…
## $ date_month.lbl_03_x_date_month.lbl_08 <dbl> 0.136394402, -0.088691838, -0.07…
## $ date_month.lbl_03_x_date_month.lbl_09 <dbl> -0.103962990, 0.165202360, -0.04…
## $ date_month.lbl_03_x_date_month.lbl_10 <dbl> 0.055892063, -0.142581793, 0.130…
## $ date_month.lbl_03_x_date_month.lbl_11 <dbl> -0.0191661612, 0.0684505758, -0.…
## $ date_month.lbl_04_x_date_month.lbl_05 <dbl> -0.06139158, 0.03339773, -0.0467…
## $ date_month.lbl_04_x_date_month.lbl_06 <dbl> -0.060550561, -0.054211934, 0.00…
## $ date_month.lbl_04_x_date_month.lbl_07 <dbl> 0.15226895, 0.01983559, 0.045002…
## $ date_month.lbl_04_x_date_month.lbl_08 <dbl> -0.17184956, 0.03697807, -0.0388…
## $ date_month.lbl_04_x_date_month.lbl_09 <dbl> 0.130987739, -0.068877413, -0.02…
## $ date_month.lbl_04_x_date_month.lbl_10 <dbl> -0.070420973, 0.059446276, 0.065…
## $ date_month.lbl_04_x_date_month.lbl_11 <dbl> 0.0241483254, -0.0285389300, -0.…
## $ date_month.lbl_05_x_date_month.lbl_06 <dbl> 0.02733526, -0.08579248, -0.0208…
## $ date_month.lbl_05_x_date_month.lbl_07 <dbl> -0.06874108, 0.03139059, -0.1170…
## $ date_month.lbl_05_x_date_month.lbl_08 <dbl> 0.07758065, 0.05851922, 0.101081…
## $ date_month.lbl_05_x_date_month.lbl_09 <dbl> -0.05913378, -0.10900116, 0.0556…
## $ date_month.lbl_05_x_date_month.lbl_10 <dbl> 0.03179121, 0.09407602, -0.17128…
## $ date_month.lbl_05_x_date_month.lbl_11 <dbl> -0.0109016446, -0.0451639563, 0.…
## $ date_month.lbl_06_x_date_month.lbl_07 <dbl> -0.06779938, -0.05095390, 0.0200…
## $ date_month.lbl_06_x_date_month.lbl_08 <dbl> 0.076517854, -0.094989697, -0.01…
## $ date_month.lbl_06_x_date_month.lbl_09 <dbl> -0.058323691, 0.176933102, -0.00…
## $ date_month.lbl_06_x_date_month.lbl_10 <dbl> 0.0313556909, -0.1527062868, 0.0…
## $ date_month.lbl_06_x_date_month.lbl_11 <dbl> -0.0107523000, 0.0733111362, -0.…
## $ date_month.lbl_07_x_date_month.lbl_08 <dbl> -0.192422552, 0.034755756, -0.09…
## $ date_month.lbl_07_x_date_month.lbl_09 <dbl> 0.146668951, -0.064738007, -0.05…
## $ date_month.lbl_07_x_date_month.lbl_10 <dbl> -0.0788514278, 0.0558736640, 0.1…
## $ date_month.lbl_07_x_date_month.lbl_11 <dbl> 0.0270392449, -0.0268237928, -0.…
## $ date_month.lbl_08_x_date_month.lbl_09 <dbl> -0.1655294447, -0.1206864256, 0.…
## $ date_month.lbl_08_x_date_month.lbl_10 <dbl> 0.0889911123, 0.1041612659, -0.1…
## $ date_month.lbl_08_x_date_month.lbl_11 <dbl> -3.051628e-02, -5.000567e-02, 1.…
## $ date_month.lbl_09_x_date_month.lbl_10 <dbl> -6.783110e-02, -1.940166e-01, -7…
## $ date_month.lbl_09_x_date_month.lbl_11 <dbl> 2.326022e-02, 9.314335e-02, 6.27…
## $ date_month.lbl_10_x_date_month.lbl_11 <dbl> -1.250504e-02, -8.038957e-02, -1…
## $ date_sin1_K1 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos1_K1 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin1_K2 <dbl> 2.993631e-01, 2.993631e-01, 6.51…
## $ date_cos1_K2 <dbl> -0.95413926, -0.95413926, -0.758…
## $ date_sin1_K3 <dbl> -8.978045e-01, -8.978045e-01, -4…
## $ date_cos1_K3 <dbl> -0.4403942, -0.4403942, -0.87434…
## $ date_sin3_K1 <dbl> 4.554948e-01, 5.432217e-01, -9.9…
## $ date_cos3_K1 <dbl> 0.8902385, -0.8395893, -0.117956…
## $ date_sin3_K2 <dbl> 8.109979e-01, -9.121663e-01, 2.3…
## $ date_cos3_K2 <dbl> 0.58504900, 0.40982032, -0.97217…
## $ date_sin3_K3 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos3_K3 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin12_K1 <dbl> 9.930187e-01, 8.010011e-01, 4.09…
## $ date_cos12_K1 <dbl> -0.11795672, -0.59866288, -0.912…
## $ date_sin12_K2 <dbl> -2.342665e-01, -9.590592e-01, -7…
## $ date_cos12_K2 <dbl> -0.9721724, -0.2832055, 0.664094…
## $ date_sin12_K3 <dbl> -9.377521e-01, 3.473053e-01, 9.5…
## $ date_cos12_K3 <dbl> 3.473053e-01, 9.377521e-01, -2.9…
## $ date_index.num_ns_1 <dbl> 0.00000000, 0.04082885, 0.080169…
## $ date_index.num_ns_2 <dbl> 0.00000000, -0.02721718, -0.0532…
recipe_spec_spline_nospecial <- recipe_spec_spline %>%
step_rm(special)
recipe_spec_spline_nospecial %>% prep() %>% juice() %>% glimpse()
## Rows: 39
## Columns: 102
## $ trans_change_yoy <dbl> -0.57704392, -0.20033185, -0.472…
## $ date_year <dbl> -1.3024299, -1.3024299, -1.30242…
## $ date_half <int> 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1,…
## $ date_quarter <int> 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1,…
## $ date_month <int> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,…
## $ date_month.lbl_01 <dbl> -0.2926847, -0.2090605, -0.12543…
## $ date_month.lbl_02 <dbl> 0.009124148, -0.155110522, -0.26…
## $ date_month.lbl_03 <dbl> 0.29268470, 0.34843417, 0.264809…
## $ date_month.lbl_04 <dbl> -0.3687669, -0.1452718, 0.134097…
## $ date_month.lbl_05 <dbl> 0.1664780, -0.2298982, -0.348811…
## $ date_month.lbl_06 <dbl> 0.16419739, 0.37317590, 0.059708…
## $ date_month.lbl_07 <dbl> -0.41291385, -0.13654123, 0.3355…
## $ date_month.lbl_08 <dbl> 0.46601138, -0.25454403, -0.2897…
## $ date_month.lbl_09 <dbl> -0.35520473, 0.47412790, -0.1596…
## $ date_month.lbl_10 <dbl> 0.190963389, -0.409207262, 0.491…
## $ date_month.lbl_11 <dbl> -0.065483987, 0.196451960, -0.39…
## $ date_month_x_date_month.lbl_01 <dbl> -0.8780541, -0.8362420, -0.62718…
## $ date_month_x_date_month.lbl_02 <dbl> 0.02737245, -0.62044209, -1.3230…
## $ date_month_x_date_month.lbl_03 <dbl> 0.8780541, 1.3937367, 1.3240498,…
## $ date_month_x_date_month.lbl_04 <dbl> -1.1063006, -0.5810872, 0.670485…
## $ date_month_x_date_month.lbl_05 <dbl> 0.4994341, -0.9195929, -1.744055…
## $ date_month_x_date_month.lbl_06 <dbl> 0.4925922, 1.4927036, 0.2985407,…
## $ date_month_x_date_month.lbl_07 <dbl> -1.23874154, -0.54616493, 1.6779…
## $ date_month_x_date_month.lbl_08 <dbl> 1.39803414, -1.01817612, -1.4489…
## $ date_month_x_date_month.lbl_09 <dbl> -1.06561418, 1.89651158, -0.7980…
## $ date_month_x_date_month.lbl_10 <dbl> 0.572890167, -1.636829049, 2.455…
## $ date_month_x_date_month.lbl_11 <dbl> -0.196451960, 0.785807840, -1.96…
## $ date_month.lbl_01_x_date_month.lbl_02 <dbl> -0.002670499, 0.032427484, 0.033…
## $ date_month.lbl_01_x_date_month.lbl_03 <dbl> -0.085664336, -0.072843823, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_04 <dbl> 0.10793242, 0.03037060, -0.01682…
## $ date_month.lbl_01_x_date_month.lbl_05 <dbl> -0.048725571, 0.048062638, 0.043…
## $ date_month.lbl_01_x_date_month.lbl_06 <dbl> -0.048058066, -0.078016340, -0.0…
## $ date_month.lbl_01_x_date_month.lbl_07 <dbl> 0.120853567, 0.028545379, -0.042…
## $ date_month.lbl_01_x_date_month.lbl_08 <dbl> -0.13639440, 0.05321510, 0.03635…
## $ date_month.lbl_01_x_date_month.lbl_09 <dbl> 0.103962990, -0.099121416, 0.020…
## $ date_month.lbl_01_x_date_month.lbl_10 <dbl> -0.055892063, 0.085549076, -0.06…
## $ date_month.lbl_01_x_date_month.lbl_11 <dbl> 0.0191661612, -0.0410703455, 0.0…
## $ date_month.lbl_02_x_date_month.lbl_03 <dbl> 0.002670499, -0.054045806, -0.07…
## $ date_month.lbl_02_x_date_month.lbl_04 <dbl> -0.003364684, 0.022533185, -0.03…
## $ date_month.lbl_02_x_date_month.lbl_05 <dbl> 0.00151897, 0.03565963, 0.092295…
## $ date_month.lbl_02_x_date_month.lbl_06 <dbl> 0.001498161, -0.057883508, -0.01…
## $ date_month.lbl_02_x_date_month.lbl_07 <dbl> -0.003767487, 0.021178982, -0.08…
## $ date_month.lbl_02_x_date_month.lbl_08 <dbl> 0.004251957, 0.039482457, 0.0766…
## $ date_month.lbl_02_x_date_month.lbl_09 <dbl> -0.003240941, -0.073542225, 0.04…
## $ date_month.lbl_02_x_date_month.lbl_10 <dbl> 0.001742378, 0.063472352, -0.129…
## $ date_month.lbl_02_x_date_month.lbl_11 <dbl> -0.0005974856, -0.0304717661, 0.…
## $ date_month.lbl_03_x_date_month.lbl_04 <dbl> -0.10793242, -0.05061766, 0.0355…
## $ date_month.lbl_03_x_date_month.lbl_05 <dbl> 0.04872557, -0.08010440, -0.0923…
## $ date_month.lbl_03_x_date_month.lbl_06 <dbl> 0.04805807, 0.13002723, 0.015811…
## $ date_month.lbl_03_x_date_month.lbl_07 <dbl> -0.12085357, -0.04757563, 0.0888…
## $ date_month.lbl_03_x_date_month.lbl_08 <dbl> 0.136394402, -0.088691838, -0.07…
## $ date_month.lbl_03_x_date_month.lbl_09 <dbl> -0.103962990, 0.165202360, -0.04…
## $ date_month.lbl_03_x_date_month.lbl_10 <dbl> 0.055892063, -0.142581793, 0.130…
## $ date_month.lbl_03_x_date_month.lbl_11 <dbl> -0.0191661612, 0.0684505758, -0.…
## $ date_month.lbl_04_x_date_month.lbl_05 <dbl> -0.06139158, 0.03339773, -0.0467…
## $ date_month.lbl_04_x_date_month.lbl_06 <dbl> -0.060550561, -0.054211934, 0.00…
## $ date_month.lbl_04_x_date_month.lbl_07 <dbl> 0.15226895, 0.01983559, 0.045002…
## $ date_month.lbl_04_x_date_month.lbl_08 <dbl> -0.17184956, 0.03697807, -0.0388…
## $ date_month.lbl_04_x_date_month.lbl_09 <dbl> 0.130987739, -0.068877413, -0.02…
## $ date_month.lbl_04_x_date_month.lbl_10 <dbl> -0.070420973, 0.059446276, 0.065…
## $ date_month.lbl_04_x_date_month.lbl_11 <dbl> 0.0241483254, -0.0285389300, -0.…
## $ date_month.lbl_05_x_date_month.lbl_06 <dbl> 0.02733526, -0.08579248, -0.0208…
## $ date_month.lbl_05_x_date_month.lbl_07 <dbl> -0.06874108, 0.03139059, -0.1170…
## $ date_month.lbl_05_x_date_month.lbl_08 <dbl> 0.07758065, 0.05851922, 0.101081…
## $ date_month.lbl_05_x_date_month.lbl_09 <dbl> -0.05913378, -0.10900116, 0.0556…
## $ date_month.lbl_05_x_date_month.lbl_10 <dbl> 0.03179121, 0.09407602, -0.17128…
## $ date_month.lbl_05_x_date_month.lbl_11 <dbl> -0.0109016446, -0.0451639563, 0.…
## $ date_month.lbl_06_x_date_month.lbl_07 <dbl> -0.06779938, -0.05095390, 0.0200…
## $ date_month.lbl_06_x_date_month.lbl_08 <dbl> 0.076517854, -0.094989697, -0.01…
## $ date_month.lbl_06_x_date_month.lbl_09 <dbl> -0.058323691, 0.176933102, -0.00…
## $ date_month.lbl_06_x_date_month.lbl_10 <dbl> 0.0313556909, -0.1527062868, 0.0…
## $ date_month.lbl_06_x_date_month.lbl_11 <dbl> -0.0107523000, 0.0733111362, -0.…
## $ date_month.lbl_07_x_date_month.lbl_08 <dbl> -0.192422552, 0.034755756, -0.09…
## $ date_month.lbl_07_x_date_month.lbl_09 <dbl> 0.146668951, -0.064738007, -0.05…
## $ date_month.lbl_07_x_date_month.lbl_10 <dbl> -0.0788514278, 0.0558736640, 0.1…
## $ date_month.lbl_07_x_date_month.lbl_11 <dbl> 0.0270392449, -0.0268237928, -0.…
## $ date_month.lbl_08_x_date_month.lbl_09 <dbl> -0.1655294447, -0.1206864256, 0.…
## $ date_month.lbl_08_x_date_month.lbl_10 <dbl> 0.0889911123, 0.1041612659, -0.1…
## $ date_month.lbl_08_x_date_month.lbl_11 <dbl> -3.051628e-02, -5.000567e-02, 1.…
## $ date_month.lbl_09_x_date_month.lbl_10 <dbl> -6.783110e-02, -1.940166e-01, -7…
## $ date_month.lbl_09_x_date_month.lbl_11 <dbl> 2.326022e-02, 9.314335e-02, 6.27…
## $ date_month.lbl_10_x_date_month.lbl_11 <dbl> -1.250504e-02, -8.038957e-02, -1…
## $ date_sin1_K1 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos1_K1 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin1_K2 <dbl> 2.993631e-01, 2.993631e-01, 6.51…
## $ date_cos1_K2 <dbl> -0.95413926, -0.95413926, -0.758…
## $ date_sin1_K3 <dbl> -8.978045e-01, -8.978045e-01, -4…
## $ date_cos1_K3 <dbl> -0.4403942, -0.4403942, -0.87434…
## $ date_sin3_K1 <dbl> 4.554948e-01, 5.432217e-01, -9.9…
## $ date_cos3_K1 <dbl> 0.8902385, -0.8395893, -0.117956…
## $ date_sin3_K2 <dbl> 8.109979e-01, -9.121663e-01, 2.3…
## $ date_cos3_K2 <dbl> 0.58504900, 0.40982032, -0.97217…
## $ date_sin3_K3 <dbl> 9.884683e-01, 9.884683e-01, 9.37…
## $ date_cos3_K3 <dbl> 0.1514278, 0.1514278, 0.3473053,…
## $ date_sin12_K1 <dbl> 9.930187e-01, 8.010011e-01, 4.09…
## $ date_cos12_K1 <dbl> -0.11795672, -0.59866288, -0.912…
## $ date_sin12_K2 <dbl> -2.342665e-01, -9.590592e-01, -7…
## $ date_cos12_K2 <dbl> -0.9721724, -0.2832055, 0.664094…
## $ date_sin12_K3 <dbl> -9.377521e-01, 3.473053e-01, 9.5…
## $ date_cos12_K3 <dbl> 3.473053e-01, 9.377521e-01, -2.9…
## $ date_index.num_ns_1 <dbl> 0.00000000, 0.04082885, 0.080169…
## $ date_index.num_ns_2 <dbl> 0.00000000, -0.02721718, -0.0532…
Calendar Lag features
recipe_spec_lag <- recipe_calendar_spec %>%
step_rm(date) %>%
step_naomit(contains("_lag"))
recipe_spec_lag %>% prep() %>% juice() %>% glimpse()
## Rows: 31
## Columns: 106
## $ trans_change_yoy_lag3 <dbl> -0.32048000, -0.35386331, -0.330…
## $ trans_change_yoy_lag3_roll_1 <dbl> -0.32048000, -0.35386331, -0.330…
## $ trans_change_yoy_lag3_roll_3 <dbl> -0.3068206, -0.3348086, -0.35617…
## $ trans_change_yoy_lag3_roll_12 <dbl> -0.315884692, -0.378366625, -0.4…
## $ special <dbl> 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,…
## $ trans_change_yoy <dbl> -0.38459128, -0.25773221, -0.139…
## $ date_index.num <dbl> -0.963506467, -0.877116509, -0.7…
## $ date_year <dbl> -1.3024299, -1.3024299, -0.30645…
## $ date_half <int> 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2,…
## $ date_quarter <int> 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3,…
## $ date_month <int> 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, …
## $ date_month.lbl_01 <dbl> 0.3763089, 0.4599331, -0.4599331…
## $ date_month.lbl_02 <dbl> 0.228103709, 0.501828160, 0.5018…
## $ date_month.lbl_03 <dbl> -0.04181210, 0.45993311, -0.4599…
## $ date_month.lbl_04 <dbl> -0.3017184, 0.3687669, 0.3687669…
## $ date_month.lbl_05 <dbl> -0.4518689, 0.2616083, -0.261608…
## $ date_month.lbl_06 <dbl> -0.46273811, 0.16419739, 0.16419…
## $ date_month.lbl_07 <dbl> -0.37014190, 0.09047913, -0.0904…
## $ date_month.lbl_08 <dbl> -0.23887978, 0.04307668, 0.04307…
## $ date_month.lbl_09 <dbl> -0.12361750, 0.01721256, -0.0172…
## $ date_month.lbl_10 <dbl> -0.049104871, 0.005456097, 0.005…
## $ date_month.lbl_11 <dbl> -0.013096797, 0.001190618, -0.00…
## $ date_month_x_date_month.lbl_01 <dbl> 4.1393979, 5.5191973, -0.4599331…
## $ date_month_x_date_month.lbl_02 <dbl> 2.50914080, 6.02193792, 0.501828…
## $ date_month_x_date_month.lbl_03 <dbl> -0.4599331, 5.5191973, -0.459933…
## $ date_month_x_date_month.lbl_04 <dbl> -3.3189019, 4.4252025, 0.3687669…
## $ date_month_x_date_month.lbl_05 <dbl> -4.9705581, 3.1392999, -0.261608…
## $ date_month_x_date_month.lbl_06 <dbl> -5.0901192, 1.9703687, 0.1641974…
## $ date_month_x_date_month.lbl_07 <dbl> -4.07156085, 1.08574956, -0.0904…
## $ date_month_x_date_month.lbl_08 <dbl> -2.62767761, 0.51692019, 0.04307…
## $ date_month_x_date_month.lbl_09 <dbl> -1.35979255, 0.20655077, -0.0172…
## $ date_month_x_date_month.lbl_10 <dbl> -0.540153586, 0.065473162, 0.005…
## $ date_month_x_date_month.lbl_11 <dbl> -0.144064771, 0.014287415, -0.00…
## $ date_month.lbl_01_x_date_month.lbl_02 <dbl> 0.085837457, 0.230807384, -0.230…
## $ date_month.lbl_01_x_date_month.lbl_03 <dbl> -0.015734266, 0.211538462, 0.211…
## $ date_month.lbl_01_x_date_month.lbl_04 <dbl> -0.11353930, 0.16960810, -0.1696…
## $ date_month.lbl_01_x_date_month.lbl_05 <dbl> -0.170042298, 0.120322328, 0.120…
## $ date_month.lbl_01_x_date_month.lbl_06 <dbl> -0.174132472, 0.075519818, -0.07…
## $ date_month.lbl_01_x_date_month.lbl_07 <dbl> -0.139287691, 0.041614347, 0.041…
## $ date_month.lbl_01_x_date_month.lbl_08 <dbl> -0.08989259, 0.01981239, -0.0198…
## $ date_month.lbl_01_x_date_month.lbl_09 <dbl> -0.046518368, 0.007916628, 0.007…
## $ date_month.lbl_01_x_date_month.lbl_10 <dbl> -0.018478600, 0.002509440, -0.00…
## $ date_month.lbl_01_x_date_month.lbl_11 <dbl> -0.0049284415, 0.0005476046, 0.0…
## $ date_month.lbl_02_x_date_month.lbl_03 <dbl> -0.009537495, 0.230807384, -0.23…
## $ date_month.lbl_02_x_date_month.lbl_04 <dbl> -0.068823076, 0.185057604, 0.185…
## $ date_month.lbl_02_x_date_month.lbl_05 <dbl> -0.10307298, 0.13128242, -0.1312…
## $ date_month.lbl_02_x_date_month.lbl_06 <dbl> -0.105552279, 0.082398876, 0.082…
## $ date_month.lbl_02_x_date_month.lbl_07 <dbl> -0.084430739, 0.045404975, -0.04…
## $ date_month.lbl_02_x_date_month.lbl_08 <dbl> -0.054489364, 0.021617092, 0.021…
## $ date_month.lbl_02_x_date_month.lbl_09 <dbl> -0.028197611, 0.008637749, -0.00…
## $ date_month.lbl_02_x_date_month.lbl_10 <dbl> -0.011201003, 0.002738023, 0.002…
## $ date_month.lbl_02_x_date_month.lbl_11 <dbl> -0.0029874280, 0.0005974856, -0.…
## $ date_month.lbl_03_x_date_month.lbl_04 <dbl> 0.01261548, 0.16960810, -0.16960…
## $ date_month.lbl_03_x_date_month.lbl_05 <dbl> 0.01889359, 0.12032233, 0.120322…
## $ date_month.lbl_03_x_date_month.lbl_06 <dbl> 0.01934805, 0.07551982, -0.07551…
## $ date_month.lbl_03_x_date_month.lbl_07 <dbl> 0.01547641, 0.04161435, 0.041614…
## $ date_month.lbl_03_x_date_month.lbl_08 <dbl> 0.009988065, 0.019812392, -0.019…
## $ date_month.lbl_03_x_date_month.lbl_09 <dbl> 0.005168708, 0.007916628, 0.0079…
## $ date_month.lbl_03_x_date_month.lbl_10 <dbl> 0.002053178, 0.002509440, -0.002…
## $ date_month.lbl_03_x_date_month.lbl_11 <dbl> 0.0005476046, 0.0005476046, 0.00…
## $ date_month.lbl_04_x_date_month.lbl_05 <dbl> 0.13633715, 0.09647248, -0.09647…
## $ date_month.lbl_04_x_date_month.lbl_06 <dbl> 0.139616582, 0.060550561, 0.0605…
## $ date_month.lbl_04_x_date_month.lbl_07 <dbl> 0.11167860, 0.03336571, -0.03336…
## $ date_month.lbl_04_x_date_month.lbl_08 <dbl> 0.07207442, 0.01588525, 0.015885…
## $ date_month.lbl_04_x_date_month.lbl_09 <dbl> 0.037297670, 0.006347423, -0.006…
## $ date_month.lbl_04_x_date_month.lbl_10 <dbl> 0.014815841, 0.002012028, 0.0020…
## $ date_month.lbl_04_x_date_month.lbl_11 <dbl> 0.0039515442, 0.0004390605, -0.0…
## $ date_month.lbl_05_x_date_month.lbl_06 <dbl> 0.20909697, 0.04295540, -0.04295…
## $ date_month.lbl_05_x_date_month.lbl_07 <dbl> 0.16725562, 0.02367009, 0.023670…
## $ date_month.lbl_05_x_date_month.lbl_08 <dbl> 0.10794235, 0.01126922, -0.01126…
## $ date_month.lbl_05_x_date_month.lbl_09 <dbl> 0.05585891, 0.00450295, 0.004502…
## $ date_month.lbl_05_x_date_month.lbl_10 <dbl> 0.02218897, 0.00142736, -0.00142…
## $ date_month.lbl_05_x_date_month.lbl_11 <dbl> 0.0059180357, 0.0003114756, 0.00…
## $ date_month.lbl_06_x_date_month.lbl_07 <dbl> 0.17127876, 0.01485644, -0.01485…
## $ date_month.lbl_06_x_date_month.lbl_08 <dbl> 0.110538779, 0.007073079, 0.0070…
## $ date_month.lbl_06_x_date_month.lbl_09 <dbl> 0.057202530, 0.002826258, -0.002…
## $ date_month.lbl_06_x_date_month.lbl_10 <dbl> 0.0227226955, 0.0008958769, 0.00…
## $ date_month.lbl_06_x_date_month.lbl_11 <dbl> 0.0060603873, 0.0001954964, -0.0…
## $ date_month.lbl_07_x_date_month.lbl_08 <dbl> 0.088419416, 0.003897541, -0.003…
## $ date_month.lbl_07_x_date_month.lbl_09 <dbl> 0.045756017, 0.001557378, 0.0015…
## $ date_month.lbl_07_x_date_month.lbl_10 <dbl> 0.0181757702, 0.0004936629, -0.0…
## $ date_month.lbl_07_x_date_month.lbl_11 <dbl> 0.0048476734, 0.0001077261, 0.00…
## $ date_month.lbl_08_x_date_month.lbl_09 <dbl> 0.0295297225, 0.0007414601, -0.0…
## $ date_month.lbl_08_x_date_month.lbl_10 <dbl> 0.0117301610, 0.0002350305, 0.00…
## $ date_month.lbl_08_x_date_month.lbl_11 <dbl> 3.128560e-03, 5.128787e-05, -5.1…
## $ date_month.lbl_09_x_date_month.lbl_10 <dbl> 6.070222e-03, 9.391342e-05, -9.3…
## $ date_month.lbl_09_x_date_month.lbl_11 <dbl> 1.618993e-03, 2.049359e-05, 2.04…
## $ date_month.lbl_10_x_date_month.lbl_11 <dbl> 6.431165e-04, 6.496127e-06, -6.4…
## $ date_sin1_K1 <dbl> 7.247928e-01, 5.712682e-01, 5.71…
## $ date_cos1_K1 <dbl> 0.68896692, 0.82076344, 0.820763…
## $ date_sin1_K2 <dbl> 9.987165e-01, 9.377521e-01, 9.37…
## $ date_cos1_K2 <dbl> -0.05064917, 0.34730525, 0.34730…
## $ date_sin1_K3 <dbl> 6.513725e-01, 9.680771e-01, 9.68…
## $ date_cos1_K3 <dbl> -0.7587581, -0.2506525, -0.25065…
## $ date_sin3_K1 <dbl> -9.680771e-01, 2.012985e-01, 7.4…
## $ date_cos3_K1 <dbl> -0.25065253, 0.97952994, -0.6640…
## $ date_sin3_K2 <dbl> 4.853020e-01, 3.943559e-01, -9.9…
## $ date_cos3_K2 <dbl> -0.8743466, 0.9189578, -0.117956…
## $ date_sin3_K3 <dbl> 7.247928e-01, 5.712682e-01, 5.71…
## $ date_cos3_K3 <dbl> 0.68896692, 0.82076344, 0.820763…
## $ date_sin12_K1 <dbl> -4.403942e-01, 5.064917e-02, 5.4…
## $ date_cos12_K1 <dbl> 0.89780454, 0.99871651, 0.839589…
## $ date_sin12_K2 <dbl> -7.907757e-01, 1.011683e-01, 9.1…
## $ date_cos12_K2 <dbl> 0.6121060, 0.9948693, 0.4098203,…
## $ date_sin12_K3 <dbl> -9.795299e-01, 1.514278e-01, 9.8…
## $ date_cos12_K3 <dbl> 2.012985e-01, 9.884683e-01, -1.5…
recipe_spec_lag_nospecial <- recipe_spec_lag %>%
step_rm(special)
recipe_spec_lag_nospecial %>% prep() %>% juice() %>% glimpse()
## Rows: 31
## Columns: 105
## $ trans_change_yoy_lag3 <dbl> -0.32048000, -0.35386331, -0.330…
## $ trans_change_yoy_lag3_roll_1 <dbl> -0.32048000, -0.35386331, -0.330…
## $ trans_change_yoy_lag3_roll_3 <dbl> -0.3068206, -0.3348086, -0.35617…
## $ trans_change_yoy_lag3_roll_12 <dbl> -0.315884692, -0.378366625, -0.4…
## $ trans_change_yoy <dbl> -0.38459128, -0.25773221, -0.139…
## $ date_index.num <dbl> -0.963506467, -0.877116509, -0.7…
## $ date_year <dbl> -1.3024299, -1.3024299, -0.30645…
## $ date_half <int> 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2,…
## $ date_quarter <int> 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3,…
## $ date_month <int> 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, …
## $ date_month.lbl_01 <dbl> 0.3763089, 0.4599331, -0.4599331…
## $ date_month.lbl_02 <dbl> 0.228103709, 0.501828160, 0.5018…
## $ date_month.lbl_03 <dbl> -0.04181210, 0.45993311, -0.4599…
## $ date_month.lbl_04 <dbl> -0.3017184, 0.3687669, 0.3687669…
## $ date_month.lbl_05 <dbl> -0.4518689, 0.2616083, -0.261608…
## $ date_month.lbl_06 <dbl> -0.46273811, 0.16419739, 0.16419…
## $ date_month.lbl_07 <dbl> -0.37014190, 0.09047913, -0.0904…
## $ date_month.lbl_08 <dbl> -0.23887978, 0.04307668, 0.04307…
## $ date_month.lbl_09 <dbl> -0.12361750, 0.01721256, -0.0172…
## $ date_month.lbl_10 <dbl> -0.049104871, 0.005456097, 0.005…
## $ date_month.lbl_11 <dbl> -0.013096797, 0.001190618, -0.00…
## $ date_month_x_date_month.lbl_01 <dbl> 4.1393979, 5.5191973, -0.4599331…
## $ date_month_x_date_month.lbl_02 <dbl> 2.50914080, 6.02193792, 0.501828…
## $ date_month_x_date_month.lbl_03 <dbl> -0.4599331, 5.5191973, -0.459933…
## $ date_month_x_date_month.lbl_04 <dbl> -3.3189019, 4.4252025, 0.3687669…
## $ date_month_x_date_month.lbl_05 <dbl> -4.9705581, 3.1392999, -0.261608…
## $ date_month_x_date_month.lbl_06 <dbl> -5.0901192, 1.9703687, 0.1641974…
## $ date_month_x_date_month.lbl_07 <dbl> -4.07156085, 1.08574956, -0.0904…
## $ date_month_x_date_month.lbl_08 <dbl> -2.62767761, 0.51692019, 0.04307…
## $ date_month_x_date_month.lbl_09 <dbl> -1.35979255, 0.20655077, -0.0172…
## $ date_month_x_date_month.lbl_10 <dbl> -0.540153586, 0.065473162, 0.005…
## $ date_month_x_date_month.lbl_11 <dbl> -0.144064771, 0.014287415, -0.00…
## $ date_month.lbl_01_x_date_month.lbl_02 <dbl> 0.085837457, 0.230807384, -0.230…
## $ date_month.lbl_01_x_date_month.lbl_03 <dbl> -0.015734266, 0.211538462, 0.211…
## $ date_month.lbl_01_x_date_month.lbl_04 <dbl> -0.11353930, 0.16960810, -0.1696…
## $ date_month.lbl_01_x_date_month.lbl_05 <dbl> -0.170042298, 0.120322328, 0.120…
## $ date_month.lbl_01_x_date_month.lbl_06 <dbl> -0.174132472, 0.075519818, -0.07…
## $ date_month.lbl_01_x_date_month.lbl_07 <dbl> -0.139287691, 0.041614347, 0.041…
## $ date_month.lbl_01_x_date_month.lbl_08 <dbl> -0.08989259, 0.01981239, -0.0198…
## $ date_month.lbl_01_x_date_month.lbl_09 <dbl> -0.046518368, 0.007916628, 0.007…
## $ date_month.lbl_01_x_date_month.lbl_10 <dbl> -0.018478600, 0.002509440, -0.00…
## $ date_month.lbl_01_x_date_month.lbl_11 <dbl> -0.0049284415, 0.0005476046, 0.0…
## $ date_month.lbl_02_x_date_month.lbl_03 <dbl> -0.009537495, 0.230807384, -0.23…
## $ date_month.lbl_02_x_date_month.lbl_04 <dbl> -0.068823076, 0.185057604, 0.185…
## $ date_month.lbl_02_x_date_month.lbl_05 <dbl> -0.10307298, 0.13128242, -0.1312…
## $ date_month.lbl_02_x_date_month.lbl_06 <dbl> -0.105552279, 0.082398876, 0.082…
## $ date_month.lbl_02_x_date_month.lbl_07 <dbl> -0.084430739, 0.045404975, -0.04…
## $ date_month.lbl_02_x_date_month.lbl_08 <dbl> -0.054489364, 0.021617092, 0.021…
## $ date_month.lbl_02_x_date_month.lbl_09 <dbl> -0.028197611, 0.008637749, -0.00…
## $ date_month.lbl_02_x_date_month.lbl_10 <dbl> -0.011201003, 0.002738023, 0.002…
## $ date_month.lbl_02_x_date_month.lbl_11 <dbl> -0.0029874280, 0.0005974856, -0.…
## $ date_month.lbl_03_x_date_month.lbl_04 <dbl> 0.01261548, 0.16960810, -0.16960…
## $ date_month.lbl_03_x_date_month.lbl_05 <dbl> 0.01889359, 0.12032233, 0.120322…
## $ date_month.lbl_03_x_date_month.lbl_06 <dbl> 0.01934805, 0.07551982, -0.07551…
## $ date_month.lbl_03_x_date_month.lbl_07 <dbl> 0.01547641, 0.04161435, 0.041614…
## $ date_month.lbl_03_x_date_month.lbl_08 <dbl> 0.009988065, 0.019812392, -0.019…
## $ date_month.lbl_03_x_date_month.lbl_09 <dbl> 0.005168708, 0.007916628, 0.0079…
## $ date_month.lbl_03_x_date_month.lbl_10 <dbl> 0.002053178, 0.002509440, -0.002…
## $ date_month.lbl_03_x_date_month.lbl_11 <dbl> 0.0005476046, 0.0005476046, 0.00…
## $ date_month.lbl_04_x_date_month.lbl_05 <dbl> 0.13633715, 0.09647248, -0.09647…
## $ date_month.lbl_04_x_date_month.lbl_06 <dbl> 0.139616582, 0.060550561, 0.0605…
## $ date_month.lbl_04_x_date_month.lbl_07 <dbl> 0.11167860, 0.03336571, -0.03336…
## $ date_month.lbl_04_x_date_month.lbl_08 <dbl> 0.07207442, 0.01588525, 0.015885…
## $ date_month.lbl_04_x_date_month.lbl_09 <dbl> 0.037297670, 0.006347423, -0.006…
## $ date_month.lbl_04_x_date_month.lbl_10 <dbl> 0.014815841, 0.002012028, 0.0020…
## $ date_month.lbl_04_x_date_month.lbl_11 <dbl> 0.0039515442, 0.0004390605, -0.0…
## $ date_month.lbl_05_x_date_month.lbl_06 <dbl> 0.20909697, 0.04295540, -0.04295…
## $ date_month.lbl_05_x_date_month.lbl_07 <dbl> 0.16725562, 0.02367009, 0.023670…
## $ date_month.lbl_05_x_date_month.lbl_08 <dbl> 0.10794235, 0.01126922, -0.01126…
## $ date_month.lbl_05_x_date_month.lbl_09 <dbl> 0.05585891, 0.00450295, 0.004502…
## $ date_month.lbl_05_x_date_month.lbl_10 <dbl> 0.02218897, 0.00142736, -0.00142…
## $ date_month.lbl_05_x_date_month.lbl_11 <dbl> 0.0059180357, 0.0003114756, 0.00…
## $ date_month.lbl_06_x_date_month.lbl_07 <dbl> 0.17127876, 0.01485644, -0.01485…
## $ date_month.lbl_06_x_date_month.lbl_08 <dbl> 0.110538779, 0.007073079, 0.0070…
## $ date_month.lbl_06_x_date_month.lbl_09 <dbl> 0.057202530, 0.002826258, -0.002…
## $ date_month.lbl_06_x_date_month.lbl_10 <dbl> 0.0227226955, 0.0008958769, 0.00…
## $ date_month.lbl_06_x_date_month.lbl_11 <dbl> 0.0060603873, 0.0001954964, -0.0…
## $ date_month.lbl_07_x_date_month.lbl_08 <dbl> 0.088419416, 0.003897541, -0.003…
## $ date_month.lbl_07_x_date_month.lbl_09 <dbl> 0.045756017, 0.001557378, 0.0015…
## $ date_month.lbl_07_x_date_month.lbl_10 <dbl> 0.0181757702, 0.0004936629, -0.0…
## $ date_month.lbl_07_x_date_month.lbl_11 <dbl> 0.0048476734, 0.0001077261, 0.00…
## $ date_month.lbl_08_x_date_month.lbl_09 <dbl> 0.0295297225, 0.0007414601, -0.0…
## $ date_month.lbl_08_x_date_month.lbl_10 <dbl> 0.0117301610, 0.0002350305, 0.00…
## $ date_month.lbl_08_x_date_month.lbl_11 <dbl> 3.128560e-03, 5.128787e-05, -5.1…
## $ date_month.lbl_09_x_date_month.lbl_10 <dbl> 6.070222e-03, 9.391342e-05, -9.3…
## $ date_month.lbl_09_x_date_month.lbl_11 <dbl> 1.618993e-03, 2.049359e-05, 2.04…
## $ date_month.lbl_10_x_date_month.lbl_11 <dbl> 6.431165e-04, 6.496127e-06, -6.4…
## $ date_sin1_K1 <dbl> 7.247928e-01, 5.712682e-01, 5.71…
## $ date_cos1_K1 <dbl> 0.68896692, 0.82076344, 0.820763…
## $ date_sin1_K2 <dbl> 9.987165e-01, 9.377521e-01, 9.37…
## $ date_cos1_K2 <dbl> -0.05064917, 0.34730525, 0.34730…
## $ date_sin1_K3 <dbl> 6.513725e-01, 9.680771e-01, 9.68…
## $ date_cos1_K3 <dbl> -0.7587581, -0.2506525, -0.25065…
## $ date_sin3_K1 <dbl> -9.680771e-01, 2.012985e-01, 7.4…
## $ date_cos3_K1 <dbl> -0.25065253, 0.97952994, -0.6640…
## $ date_sin3_K2 <dbl> 4.853020e-01, 3.943559e-01, -9.9…
## $ date_cos3_K2 <dbl> -0.8743466, 0.9189578, -0.117956…
## $ date_sin3_K3 <dbl> 7.247928e-01, 5.712682e-01, 5.71…
## $ date_cos3_K3 <dbl> 0.68896692, 0.82076344, 0.820763…
## $ date_sin12_K1 <dbl> -4.403942e-01, 5.064917e-02, 5.4…
## $ date_cos12_K1 <dbl> 0.89780454, 0.99871651, 0.839589…
## $ date_sin12_K2 <dbl> -7.907757e-01, 1.011683e-01, 9.1…
## $ date_cos12_K2 <dbl> 0.6121060, 0.9948693, 0.4098203,…
## $ date_sin12_K3 <dbl> -9.795299e-01, 1.514278e-01, 9.8…
## $ date_cos12_K3 <dbl> 2.012985e-01, 9.884683e-01, -1.5…
Save the recipes:
list(
recipe_calendar_spec = recipe_calendar_spec,
recipe_calendar_spec_nolag = recipe_calendar_spec_nolag,
recipe_calendar_spec_lag = recipe_calendar_spec_lag,
recipe_spec_spline = recipe_spec_spline,
recipe_spec_spline_nospecial = recipe_spec_spline_nospecial,
recipe_spec_lag = recipe_spec_lag,
recipe_spec_lag_nospecial = recipe_spec_lag_nospecial
) %>%
write_rds("data/recipes.rds")
Fit the models and compare them:
workflow_lm_spline_nospecial_fit <- recipe_spec_spline_nospecial %>%
workflow(linear_reg(penalty = 0.1) %>%
set_engine("glmnet")) %>%
fit(training(splits))
workflow_lm_lag_nospecial_fit <- recipe_spec_lag_nospecial %>%
# prep() %>% juice() %>% glimpse()
workflow(linear_reg(penalty = 0.1) %>%
set_engine("glmnet")) %>%
fit(training(splits))
workflow_lm_spline_fit <- recipe_spec_spline %>%
# prep() %>% juice() %>% glimpse()
workflow(linear_reg(penalty = 0.1) %>%
set_engine("glmnet")) %>%
fit(training(splits))
workflow_lm_lag_fit <- recipe_spec_lag %>%
# prep() %>% juice() %>% glimpse()
workflow(linear_reg(penalty = 0.1) %>%
set_engine("glmnet")) %>%
fit(training(splits))
workflow_fit_arima_non_seasonal_ar <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg(
non_seasonal_ar = 1
) %>%
set_engine("arima")) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
workflow_fit_arima_non_seasonal_differences <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg(
non_seasonal_ar = 1,
non_seasonal_differences = 1
) %>%
set_engine("arima")) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
workflow_fit_arima_non_seasonal_ma <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg(
non_seasonal_ar = 1,
non_seasonal_differences = 1,
non_seasonal_ma = 1,
) %>%
set_engine("arima")) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
workflow_fit_arima_seasonal <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg(
seasonal_period = 3,
seasonal_ar = 1,
non_seasonal_ar = 1,
non_seasonal_differences = 1,
non_seasonal_ma = 1,
) %>%
set_engine("arima")) %>%
fit(training(splits))
workflow_fit_arima_seasonal_differences <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg(
seasonal_period = 3,
seasonal_ar = 1,
seasonal_differences = 1,
non_seasonal_ar = 1,
non_seasonal_differences = 1,
non_seasonal_ma = 1,
) %>%
set_engine("arima")) %>%
fit(training(splits))
workflow_fit_arima_seasonal_ma <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg(
seasonal_period = 3,
seasonal_ar = 1,
seasonal_differences = 1,
seasonal_ma = 1,
non_seasonal_ar = 1,
non_seasonal_differences = 1,
non_seasonal_ma = 1,
) %>%
set_engine("arima")) %>%
fit(training(splits))
workflow_fit_arima_seasonal_2 <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg(
seasonal_period = 3,
seasonal_ar = 2,
seasonal_differences = 1,
seasonal_ma = 2,
non_seasonal_ar = 1,
non_seasonal_differences = 1,
non_seasonal_ma = 1
) %>%
set_engine("arima")) %>%
fit(training(splits))
workflow_fit_auto_arima_baseline <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
workflow(arima_reg() %>%
set_engine("auto_arima")) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
workflow_fit_auto_arima_fourier <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
step_fourier(date, period = 3, K = 3) %>%
step_fourier(date, period = 12, K = 3) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg() %>%
set_engine("auto_arima")) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
workflow_fit_auto_arima_fourier_events <-
recipe(trans_change_yoy ~ date + special, data = training(splits)) %>%
step_fourier(date, period = 3, K = 3) %>%
step_fourier(date, period = 12, K = 3) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg() %>%
set_engine("auto_arima")) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
workflow_fit_auto_arima_label <-
recipe(trans_change_yoy ~ date + special, data = training(splits)) %>%
step_fourier(date, period = 3, K = 3) %>%
step_fourier(date, period = 12, K = 3) %>%
step_date(date, features = "month", label = TRUE) %>%
step_date(date, features = "year", label = FALSE) %>%
#prep() %>% juice() %>% glimpse()
workflow(arima_reg() %>%
set_engine("auto_arima")) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
workflow_fit_prophet <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(prophet_reg(
changepoint_num = 25,
changepoint_range = 0.8,
seasonality_yearly = TRUE,
seasonality_weekly = FALSE,
seasonality_daily = FALSE
) %>%
set_engine("prophet")) %>%
fit(training(splits))
workflow_fit_prophet_xregs <-
recipe(trans_change_yoy ~ date + special, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(prophet_reg(
changepoint_num = 25,
changepoint_range = 0.8,
seasonality_yearly = TRUE,
seasonality_weekly = FALSE,
seasonality_daily = FALSE) %>%
set_engine("prophet")) %>%
fit(training(splits))
workflow_fit_ets <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(exp_smoothing(
error = "additive",
trend = "additive",
season = "additive"
) %>%
set_engine("ets")) %>%
fit(data = training(splits))
## frequency = 11 observations per 1 year
workflow_fit_tbats <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(seasonal_reg(
seasonal_period_1 = 3,
seasonal_period_2 = 6,
seasonal_period_3 = 12
) %>%
set_engine("tbats")) %>%
fit(training(splits))
workflow_fit_stlm_ets <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(seasonal_reg(
seasonal_period_1 = 3,
seasonal_period_2 = 6,
seasonal_period_3 = 12
) %>%
set_engine("stlm_ets")) %>%
fit(training(splits))
workflow_fit_stlm_arima <-
recipe(trans_change_yoy ~ date, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(seasonal_reg(
seasonal_period_1 = 3,
seasonal_period_2 = 6,
seasonal_period_3 = 12
) %>%
set_engine("stlm_arima")) %>%
fit(training(splits))
workflow_fit_stlm_arima_xregs <-
recipe(trans_change_yoy ~ date + special, data = training(splits)) %>%
#prep() %>% juice() %>% glimpse()
workflow(seasonal_reg(
seasonal_period_1 = 3,
seasonal_period_2 = 6,
seasonal_period_3 = 12
) %>%
set_engine("stlm_arima")) %>%
fit(data = training(splits))
model_spec_mars <- mars(
mode = "regression",
num_terms = 10
) %>%
set_engine("earth", endspan = 24)
workflow_fit_mars_simple <- workflow() %>%
add_model(model_spec_mars) %>%
add_recipe(
recipe = recipe(trans_change_yoy ~ date, data = training(splits)) %>%
step_mutate(date = as.numeric(date))
) %>%
fit(training(splits))
workflow_fit_mars_spline <- workflow() %>%
add_model(model_spec_mars) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
workflow_fit_mars_lag <- workflow() %>%
add_model(model_spec_mars) %>%
add_recipe(recipe_spec_lag) %>%
fit(training(splits))
model_spec_svm_poly <- svm_poly(
mode = "regression",
cost = 10,
degree = 1,
scale_factor = 1,
margin = 0.1
) %>%
set_engine("kernlab")
set.seed(123)
workflow_fit_svm_poly_spline <- workflow() %>%
add_model(model_spec_svm_poly) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
set.seed(123)
workflow_fit_svm_poly_lag <- workflow() %>%
add_model(model_spec_svm_poly) %>%
add_recipe(recipe_spec_lag) %>%
fit(training(splits))
model_spec_svm_rbf <- svm_rbf(
mode = "regression",
cost = 1,
rbf_sigma = 0.01,
margin = 0.1
) %>%
set_engine("kernlab")
workflow_fit_svm_rbf_spline <- workflow() %>%
add_model(model_spec_svm_rbf) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
workflow_fit_svm_rbf_lag <- workflow_fit_svm_rbf_spline %>%
update_recipe(recipe_spec_lag) %>%
fit(training(splits))
model_spec_knn <- nearest_neighbor(
mode = "regression",
neighbors = 50,
dist_power = 10,
weight_func = "optimal"
) %>%
set_engine("kknn")
set.seed(123)
workflow_fit_knn_spline <- workflow() %>%
add_model(model_spec_knn) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
## Warning: 50 samples were requested but there were 39 rows in the data. 34 will
## be used.
set.seed(123)
workflow_fit_knn_lag <- workflow_fit_knn_spline %>%
update_recipe(recipe_spec_lag) %>%
fit(training(splits))
## Warning: 50 samples were requested but there were 31 rows in the data. 26 will
## be used.
model_spec_rf <- rand_forest(
mode = "regression",
mtry = 25,
trees = 1000,
min_n = 25
) %>%
set_engine("ranger")
set.seed(123)
workflow_fit_rf_spline <- workflow() %>%
add_model(model_spec_rf) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
set.seed(123)
workflow_fit_rf_lag <- workflow_fit_rf_spline %>%
update_recipe(recipe_spec_lag) %>%
fit(training(splits))
model_spec_boost <- boost_tree(
mode = "regression",
mtry = 25,
trees = 1000,
min_n = 2,
tree_depth = 12,
learn_rate = 0.3,
loss_reduction = 0
) %>%
set_engine("xgboost")
set.seed(123)
workflow_fit_xgboost_spline <- workflow() %>%
add_model(model_spec_boost) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
set.seed(123)
workflow_fit_xgboost_lag <- workflow_fit_xgboost_spline %>%
update_recipe(recipe_spec_lag) %>%
fit(training(splits))
model_spec_cubist <- cubist_rules(
committees = 50,
neighbors = 7,
max_rules = 100
) %>%
set_engine("Cubist")
set.seed(123)
workflow_fit_cubist_spline <- workflow() %>%
add_model(model_spec_cubist) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
set.seed(123)
workflow_fit_cubist_lag <- workflow_fit_cubist_spline %>%
update_recipe(recipe_spec_lag) %>%
fit(training(splits))
model_spec_nnet <- mlp(
mode = "regression",
hidden_units = 9,
penalty = 1,
epochs = 100
) %>%
set_engine("nnet")
set.seed(123)
workflow_fit_nnet_spline <- workflow() %>%
add_model(model_spec_nnet) %>%
add_recipe(recipe_spec_spline) %>%
fit(training(splits))
set.seed(123)
workflow_fit_nnet_lag <- workflow_fit_nnet_spline %>%
update_recipe(recipe_spec_lag) %>%
fit(training(splits))
model_spec_nnetar <- nnetar_reg(
non_seasonal_ar = 3,
seasonal_ar = 1,
hidden_units = 9,
penalty = 10,
num_networks = 10,
epochs = 50
) %>%
set_engine("nnetar")
set.seed(123)
workflow_fit_nnetar_base <- workflow() %>%
add_model(model_spec_nnetar) %>%
add_recipe(recipe_calendar_spec) %>%
fit(training(splits) %>% drop_na())
## frequency = 8.5 observations per 1 year
model_spec_prophet_boost <- prophet_boost(
changepoint_num = 25,
changepoint_range = 0.8,
seasonality_daily = FALSE,
seasonality_weekly = FALSE,
seasonality_yearly = FALSE,
mtry = 0.75,
min_n = 20,
tree_depth = 3,
learn_rate = 0.2,
loss_reduction = 0.15,
trees = 300
) %>%
set_engine("prophet_xgboost", counts = FALSE)
set.seed(123)
workflow_fit_prophet_boost <- workflow() %>%
add_model(model_spec_prophet_boost) %>%
add_recipe(recipe_calendar_spec_nolag) %>%
fit(training(splits))
model_spec_arima_boost <- arima_boost(
non_seasonal_ar = 1,
non_seasonal_differences = 1,
mtry = 0.75,
min_n = 20,
tree_depth = 3,
learn_rate = 0.25,
loss_reduction = 0.15,
trees = 300
) %>%
set_engine(
"auto_arima_xgboost",
counts = FALSE
)
set.seed(123)
workflow_fit_arima_boost <- workflow_fit_prophet_boost %>%
update_model(model_spec_arima_boost) %>%
fit(training(splits))
## frequency = 11 observations per 1 year
models_tbl <- modeltime_table(
workflow_lm_spline_nospecial_fit,
workflow_lm_lag_nospecial_fit,
workflow_lm_spline_fit,
workflow_lm_lag_fit,
workflow_fit_arima_non_seasonal_ar,
workflow_fit_arima_non_seasonal_differences,
workflow_fit_arima_non_seasonal_ma,
workflow_fit_arima_seasonal,
workflow_fit_arima_seasonal_differences,
workflow_fit_arima_seasonal_ma,
workflow_fit_arima_seasonal_2,
workflow_fit_auto_arima_baseline,
workflow_fit_auto_arima_fourier,
workflow_fit_auto_arima_fourier_events,
workflow_fit_auto_arima_label,
workflow_fit_prophet,
workflow_fit_prophet_xregs,
workflow_fit_ets,
workflow_fit_tbats,
workflow_fit_stlm_ets,
workflow_fit_stlm_arima,
workflow_fit_stlm_arima_xregs,
workflow_fit_mars_simple,
workflow_fit_mars_spline,
workflow_fit_mars_lag,
workflow_fit_svm_poly_spline,
workflow_fit_svm_poly_lag,
workflow_fit_svm_rbf_spline,
workflow_fit_svm_rbf_lag,
workflow_fit_knn_spline,
workflow_fit_knn_lag,
workflow_fit_rf_spline,
workflow_fit_rf_lag,
workflow_fit_xgboost_spline,
workflow_fit_xgboost_lag,
workflow_fit_cubist_spline,
workflow_fit_cubist_lag,
workflow_fit_nnet_spline,
workflow_fit_nnet_lag,
workflow_fit_nnetar_base,
workflow_fit_arima_boost,
workflow_fit_prophet_boost
) %>%
update_modeltime_description(1, "LM_workflow_lm_spline_nospecial_fit") %>%
update_modeltime_description(2, "LM_workflow_lm_lag_nospecial_fit") %>%
update_modeltime_description(3, "LM_workflow_lm_spline_fit") %>%
update_modeltime_description(4, "LM_workflow_lm_lag_fit") %>%
update_modeltime_description(5, "ARIMA_workflow_fit_arima_non_seasonal_ar") %>%
update_modeltime_description(6, "ARIMA_workflow_fit_arima_non_seasonal_differences") %>%
update_modeltime_description(7, "ARIMA_workflow_fit_arima_non_seasonal_ma") %>%
update_modeltime_description(8, "ARIMA_workflow_fit_arima_seasonal") %>%
update_modeltime_description(9, "ARIMA_workflow_fit_arima_seasonal_differences") %>%
update_modeltime_description(10, "ARIMA_workflow_fit_arima_seasonal_ma") %>%
update_modeltime_description(11, "ARIMA_workflow_fit_arima_seasonal_2") %>%
update_modeltime_description(12, "ARIMA_workflow_fit_auto_arima_baseline") %>%
update_modeltime_description(13, "ARIMA_workflow_fit_auto_arima_fourier") %>%
update_modeltime_description(14, "ARIMA_workflow_fit_auto_arima_fourier_events") %>%
update_modeltime_description(15, "ARIMA_workflow_fit_auto_arima_label") %>%
update_modeltime_description(16, "PROPHET_workflow_fit_prophet") %>%
update_modeltime_description(17, "PROPHET_workflow_fit_prophet_xregs") %>%
update_modeltime_description(18, "ES_workflow_fit_ets") %>%
update_modeltime_description(19, "ES_workflow_fit_tbats") %>%
update_modeltime_description(20, "ES_workflow_fit_stlm_ets") %>%
update_modeltime_description(21, "ES_workflow_fit_stlm_arima") %>%
update_modeltime_description(22, "ES_workflow_fit_stlm_arima_xregs") %>%
update_modeltime_description(23, "MARS_workflow_fit_mars_simple") %>%
update_modeltime_description(24, "MARS_workflow_fit_mars_spline") %>%
update_modeltime_description(25, "MARS_workflow_fit_mars_lag") %>%
update_modeltime_description(26, "SVM_workflow_fit_svm_poly_spline") %>%
update_modeltime_description(27, "SVM_workflow_fit_svm_poly_lag") %>%
update_modeltime_description(28, "SVM_workflow_fit_svm_rbf_spline") %>%
update_modeltime_description(29, "SVM_workflow_fit_svm_rbf_lag") %>%
update_modeltime_description(30, "KNN_workflow_fit_knn_spline") %>%
update_modeltime_description(31, "KNN_workflow_fit_knn_lag") %>%
update_modeltime_description(32, "RF_workflow_fit_rf_spline") %>%
update_modeltime_description(33, "RF_workflow_fit_rf_lag") %>%
update_modeltime_description(34, "XGBOOST_workflow_fit_xgboost_spline") %>%
update_modeltime_description(35, "XGBOOST_workflow_fit_xgboost_lag") %>%
update_modeltime_description(36, "CUBIST_workflow_fit_cubist_spline") %>%
update_modeltime_description(37, "CUBIST_workflow_fit_cubist_lag") %>%
update_modeltime_description(38, "NNET_workflow_fit_nnet_spline") %>%
update_modeltime_description(39, "NNET_workflow_fit_nnet_lag") %>%
update_modeltime_description(40, "NNET_workflow_fit_nnetar_base") %>%
update_modeltime_description(41, "BOOST_workflow_fit_arima_boost") %>%
update_modeltime_description(42, "BOOST_workflow_fit_prophet_boost")
calibration_tbl <- models_tbl %>%
modeltime_calibrate(
new_data = testing(splits)
)
calibration_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl
) %>%
plot_modeltime_forecast()
calibration_tbl %>%
modeltime_accuracy()
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## # A tibble: 42 × 9
## .model_id .model_desc .type mae mape mase smape rmse rsq
## <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 LM_workflow_lm_spline… Test 0.193 208. 1.05 157. 0.213 0.0507
## 2 2 LM_workflow_lm_lag_no… Test 0.140 150. 0.761 144. 0.165 0.0507
## 3 3 LM_workflow_lm_spline… Test 0.193 208. 1.05 157. 0.213 0.0507
## 4 4 LM_workflow_lm_lag_fit Test 0.140 150. 0.761 144. 0.165 0.0507
## 5 5 ARIMA_workflow_fit_ar… Test 0.0845 85.8 0.460 142. 0.0943 0.0389
## 6 6 ARIMA_workflow_fit_ar… Test 0.0805 84.8 0.438 116. 0.0916 0.157
## 7 7 ARIMA_workflow_fit_ar… Test 0.136 155. 0.740 105. 0.163 0.0292
## 8 8 ARIMA_workflow_fit_ar… Test 0.139 159. 0.756 106. 0.166 0.0273
## 9 9 ARIMA_workflow_fit_ar… Test 0.131 175. 0.713 105. 0.148 0.666
## 10 10 ARIMA_workflow_fit_ar… Test 0.361 408. 1.97 148. 0.380 0.0135
## # … with 32 more rows
calibration_tbl %>%
modeltime_accuracy() %>%
arrange(mae) %>%
print(n = Inf)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## # A tibble: 42 × 9
## .model_id .model_desc .type mae mape mase smape rmse rsq
## <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 38 NNET_workflow_fit… Test 0.0607 71.2 0.330 80.8 0.0856 0.959
## 2 21 ES_workflow_fit_s… Test 0.0675 65.8 0.367 107. 0.0911 0.575
## 3 22 ES_workflow_fit_s… Test 0.0675 65.8 0.367 107. 0.0911 0.575
## 4 32 RF_workflow_fit_r… Test 0.0717 69.4 0.390 121. 0.0851 0.148
## 5 12 ARIMA_workflow_fi… Test 0.0729 75.0 0.397 91.8 0.0943 NA
## 6 41 BOOST_workflow_fi… Test 0.0767 83.0 0.417 89.9 0.100 NA
## 7 20 ES_workflow_fit_s… Test 0.0775 77.6 0.422 130. 0.101 0.575
## 8 19 ES_workflow_fit_t… Test 0.0793 83.1 0.432 115. 0.0908 NA
## 9 6 ARIMA_workflow_fi… Test 0.0805 84.8 0.438 116. 0.0916 0.157
## 10 5 ARIMA_workflow_fi… Test 0.0845 85.8 0.460 142. 0.0943 0.0389
## 11 25 MARS_workflow_fit… Test 0.0897 97.5 0.488 124. 0.102 0.436
## 12 33 RF_workflow_fit_r… Test 0.121 139. 0.659 155. 0.134 0.0773
## 13 9 ARIMA_workflow_fi… Test 0.131 175. 0.713 105. 0.148 0.666
## 14 24 MARS_workflow_fit… Test 0.133 152. 0.725 135. 0.169 0.0507
## 15 7 ARIMA_workflow_fi… Test 0.136 155. 0.740 105. 0.163 0.0292
## 16 8 ARIMA_workflow_fi… Test 0.139 159. 0.756 106. 0.166 0.0273
## 17 2 LM_workflow_lm_la… Test 0.140 150. 0.761 144. 0.165 0.0507
## 18 4 LM_workflow_lm_la… Test 0.140 150. 0.761 144. 0.165 0.0507
## 19 28 SVM_workflow_fit_… Test 0.143 171. 0.780 104. 0.182 0.728
## 20 34 XGBOOST_workflow_… Test 0.144 152. 0.784 120. 0.153 0.754
## 21 36 CUBIST_workflow_f… Test 0.157 173. 0.857 94.7 0.224 0.328
## 22 35 XGBOOST_workflow_… Test 0.173 188. 0.941 150. 0.197 0.00298
## 23 31 KNN_workflow_fit_… Test 0.182 234. 0.992 115. 0.217 0.179
## 24 29 SVM_workflow_fit_… Test 0.189 256. 1.03 114. 0.230 0.00668
## 25 1 LM_workflow_lm_sp… Test 0.193 208. 1.05 157. 0.213 0.0507
## 26 3 LM_workflow_lm_sp… Test 0.193 208. 1.05 157. 0.213 0.0507
## 27 13 ARIMA_workflow_fi… Test 0.201 222. 1.10 141. 0.210 0.960
## 28 18 ES_workflow_fit_e… Test 0.201 247. 1.10 199. 0.219 0.357
## 29 11 ARIMA_workflow_fi… Test 0.207 221. 1.13 126. 0.225 0.289
## 30 30 KNN_workflow_fit_… Test 0.239 283. 1.30 130. 0.262 0.726
## 31 14 ARIMA_workflow_fi… Test 0.271 301. 1.48 148. 0.277 0.919
## 32 40 NNET_workflow_fit… Test 0.308 374. 1.68 143. 0.322 0.00201
## 33 39 NNET_workflow_fit… Test 0.355 430. 1.93 147. 0.370 0.113
## 34 10 ARIMA_workflow_fi… Test 0.361 408. 1.97 148. 0.380 0.0135
## 35 37 CUBIST_workflow_f… Test 0.411 437. 2.24 176. 0.439 0.0368
## 36 42 BOOST_workflow_fi… Test 0.707 822. 3.85 170. 0.712 0.0907
## 37 17 PROPHET_workflow_… Test 0.736 849. 4.01 170. 0.748 0.215
## 38 16 PROPHET_workflow_… Test 0.793 924. 4.31 172. 0.804 0.611
## 39 23 MARS_workflow_fit… Test 0.893 1033. 4.86 176. 0.897 0.0907
## 40 27 SVM_workflow_fit_… Test 1.43 1577. 7.80 184. 1.46 0.0587
## 41 15 ARIMA_workflow_fi… Test 2.34 2615. 12.7 196. 2.40 0.473
## 42 26 SVM_workflow_fit_… Test 4.40 5120. 24.0 198. 4.41 0.908
write_rds(calibration_tbl, "data/calibration_tbl.rds")
Best model for each model type:
rmse_models <- calibration_tbl %>%
modeltime_accuracy() %>%
arrange(rmse) %>%
group_by(.model_type = str_extract(.model_desc, "(.+)_(model|workflow)(.+)", group = 1)) %>%
summarise(
.model_id = first(.model_id),
.model_desc = first(.model_desc),
mae = first(mae),
rmse = first(rmse),
rsq = first(rsq)
)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
mae_models <- calibration_tbl %>%
modeltime_accuracy() %>%
arrange(mae) %>%
group_by(.model_type = str_extract(.model_desc, "(.+)_(model|workflow)(.+)", group = 1)) %>%
summarise(
.model_id = first(.model_id),
.model_desc = first(.model_desc),
mae = first(mae),
rmse = first(rmse),
rsq = first(rsq)
)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
bind_rows(
rmse_models,
mae_models
) %>%
distinct() %>%
arrange(.model_type)
## # A tibble: 14 × 6
## .model_type .model_id .model_desc mae rmse rsq
## <chr> <int> <chr> <dbl> <dbl> <dbl>
## 1 ARIMA 6 ARIMA_workflow_fit_arima_non_sea… 0.0805 0.0916 0.157
## 2 ARIMA 12 ARIMA_workflow_fit_auto_arima_ba… 0.0729 0.0943 NA
## 3 BOOST 41 BOOST_workflow_fit_arima_boost 0.0767 0.100 NA
## 4 CUBIST 36 CUBIST_workflow_fit_cubist_spline 0.157 0.224 0.328
## 5 ES 19 ES_workflow_fit_tbats 0.0793 0.0908 NA
## 6 ES 21 ES_workflow_fit_stlm_arima 0.0675 0.0911 0.575
## 7 KNN 31 KNN_workflow_fit_knn_lag 0.182 0.217 0.179
## 8 LM 2 LM_workflow_lm_lag_nospecial_fit 0.140 0.165 0.0507
## 9 MARS 25 MARS_workflow_fit_mars_lag 0.0897 0.102 0.436
## 10 NNET 38 NNET_workflow_fit_nnet_spline 0.0607 0.0856 0.959
## 11 PROPHET 17 PROPHET_workflow_fit_prophet_xre… 0.736 0.748 0.215
## 12 RF 32 RF_workflow_fit_rf_spline 0.0717 0.0851 0.148
## 13 SVM 28 SVM_workflow_fit_svm_rbf_spline 0.143 0.182 0.728
## 14 XGBOOST 34 XGBOOST_workflow_fit_xgboost_spl… 0.144 0.153 0.754
bind_rows(
rmse_models,
mae_models
) %>%
distinct() %>%
arrange(mae)
## # A tibble: 14 × 6
## .model_type .model_id .model_desc mae rmse rsq
## <chr> <int> <chr> <dbl> <dbl> <dbl>
## 1 NNET 38 NNET_workflow_fit_nnet_spline 0.0607 0.0856 0.959
## 2 ES 21 ES_workflow_fit_stlm_arima 0.0675 0.0911 0.575
## 3 RF 32 RF_workflow_fit_rf_spline 0.0717 0.0851 0.148
## 4 ARIMA 12 ARIMA_workflow_fit_auto_arima_ba… 0.0729 0.0943 NA
## 5 BOOST 41 BOOST_workflow_fit_arima_boost 0.0767 0.100 NA
## 6 ES 19 ES_workflow_fit_tbats 0.0793 0.0908 NA
## 7 ARIMA 6 ARIMA_workflow_fit_arima_non_sea… 0.0805 0.0916 0.157
## 8 MARS 25 MARS_workflow_fit_mars_lag 0.0897 0.102 0.436
## 9 LM 2 LM_workflow_lm_lag_nospecial_fit 0.140 0.165 0.0507
## 10 SVM 28 SVM_workflow_fit_svm_rbf_spline 0.143 0.182 0.728
## 11 XGBOOST 34 XGBOOST_workflow_fit_xgboost_spl… 0.144 0.153 0.754
## 12 CUBIST 36 CUBIST_workflow_fit_cubist_spline 0.157 0.224 0.328
## 13 KNN 31 KNN_workflow_fit_knn_lag 0.182 0.217 0.179
## 14 PROPHET 17 PROPHET_workflow_fit_prophet_xre… 0.736 0.748 0.215
# c(38, 12, 21, 41, 32)
Residuals
residuals_out_tbl <- calibration_tbl %>%
modeltime_residuals()
residuals_out_tbl %>%
plot_modeltime_residuals(
.y_intercept = 0,
.y_intercept_color = "blue"
)
residuals_in_tbl <- calibration_tbl %>%
modeltime_residuals(
training(splits) %>% drop_na()
)
residuals_in_tbl %>% plot_modeltime_residuals(
.y_intercept = 0,
.y_intercept_color = "blue"
)
residuals_in_tbl %>%
plot_modeltime_residuals(
.type = "acf"
)
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
## Max lag exceeds data available. Using max lag: 30
residuals_in_tbl %>%
plot_modeltime_residuals(
.type = "seasonality"
)
## Warning: Removed 24 rows containing non-finite values (`stat_boxplot()`).
model_spec_nnetar <- nnetar_reg(
seasonal_period = 3,
non_seasonal_ar = tune(id = "non_seasonal_ar"),
seasonal_ar = tune(),
hidden_units = tune(),
num_networks = 10,
penalty = tune(),
epochs = 50
) %>%
set_engine("nnetar")
extract_parameter_set_dials(model_spec_nnetar)
## Collection of 4 parameters for tuning
##
## identifier type object
## non_seasonal_ar non_seasonal_ar nparam[+]
## seasonal_ar seasonal_ar nparam[+]
## hidden_units hidden_units nparam[+]
## penalty penalty nparam[+]
# Round 1
set.seed(123)
grid_spec_nnetar_1 <- grid_latin_hypercube(
extract_parameter_set_dials(model_spec_nnetar),
size = 15
)
grid_spec_nnetar_1
## # A tibble: 15 × 4
## non_seasonal_ar seasonal_ar hidden_units penalty
## <int> <int> <int> <dbl>
## 1 3 2 9 7.53e- 5
## 2 0 1 8 6.40e- 7
## 3 4 1 7 4.43e-10
## 4 5 1 3 8.92e- 3
## 5 5 0 4 6.29e- 6
## 6 3 2 8 9.89e- 2
## 7 0 1 10 2.35e- 2
## 8 4 0 4 2.20e- 9
## 9 2 1 6 1.97e- 6
## 10 3 1 1 8.01e-10
## 11 1 2 9 5.45e- 4
## 12 2 0 2 6.28e- 1
## 13 1 1 5 1.45e- 7
## 14 1 0 2 4.37e- 8
## 15 2 2 5 1.21e- 4
# Round 2
set.seed(123)
grid_spec_nnetar_2 <- grid_latin_hypercube(
non_seasonal_ar(range = c(1, 4)),
seasonal_ar(range = c(1, 1)),
hidden_units(range = c(4, 6)),
penalty(range = c(-8, -5), trans = scales::log10_trans()),
size = 15
)
grid_spec_nnetar_2
## # A tibble: 15 × 4
## non_seasonal_ar seasonal_ar hidden_units penalty
## <int> <int> <int> <dbl>
## 1 3 1 6 0.000000580
## 2 1 1 5 0.000000139
## 3 3 1 5 0.0000000156
## 4 4 1 4 0.00000243
## 5 4 1 5 0.000000275
## 6 3 1 5 0.00000499
## 7 1 1 6 0.00000324
## 8 3 1 5 0.0000000253
## 9 2 1 5 0.000000194
## 10 3 1 4 0.0000000187
## 11 2 1 6 0.00000105
## 12 2 1 4 0.00000870
## 13 2 1 5 0.0000000889
## 14 2 1 4 0.0000000620
## 15 2 1 5 0.000000669
wflw_tune_nnetar <- workflow_fit_nnetar_base %>%
update_recipe(recipe_calendar_spec_lag) %>%
update_model(model_spec_nnetar)
# nnetar_tune
tictoc::tic()
set.seed(123)
tune_results_nnetar_2 <- wflw_tune_nnetar %>%
tune_grid(
resamples = resamples_tscv_lag,
grid = grid_spec_nnetar_2,
metrics = default_forecast_accuracy_metric_set(),
control = control_grid(verbose = TRUE, save_pred = TRUE)
)
g <- tune_results_nnetar_2 %>%
autoplot() +
geom_smooth(se = FALSE)
ggplotly(g)
tune_results_nnetar_2 %>% show_best(metric = "rmse", n = Inf)
tictoc::toc()
# nnetar_finalize
workflow_fit_nnetar_tscv <- wflw_tune_nnetar %>%
finalize_workflow(
tune_results_nnetar_2 %>%
show_best(metric = "rmse", n = Inf) %>%
dplyr::slice(1)
) %>%
fit(training(splits))
write_rds(workflow_fit_nnetar_tscv, "data/workflow_fit_nnetar_tscv.rds")
GLMNet XGBoost Prophet PROPHET BOOST
model_spec_prophet_boost <- prophet_boost(
changepoint_num = 24,
changepoint_range = 0.8,
seasonality_yearly = FALSE,
seasonality_weekly = FALSE,
seasonality_daily = FALSE,
mtry = tune(),
trees = 300,
min_n = tune(),
tree_depth = tune(),
learn_rate = tune(),
loss_reduction = tune()
) %>%
set_engine("prophet_xgboost")
# Round 1
set.seed(123)
grid_spec_prophet_boost_1 <- grid_latin_hypercube(
parameters(model_spec_prophet_boost) %>%
update(
mtry = mtry(range = c(1, 65))
),
size = 15
)
## Warning: `parameters.model_spec()` was deprecated in tune 0.1.6.9003.
## ℹ Please use `hardhat::extract_parameter_set_dials()` instead.
# Round 2
set.seed(123)
grid_spec_prophet_boost_2 <- grid_latin_hypercube(
mtry(range = c(2, 50)),
min_n(range = c(1, 13)),
tree_depth(range = c(2, 12)),
learn_rate(range = c(-3, -1), trans = scales::log10_trans()),
loss_reduction(range = c(-5, 1), trans = scales::log10_trans()),
size = 15
)
# Round 3
set.seed(123)
grid_spec_prophet_boost_3 <- grid_latin_hypercube(
mtry(range = c(9, 36)),
min_n(range = c(1, 5)),
tree_depth(range = c(2, 12)),
learn_rate(range = c(-2.5, -1.7)),
loss_reduction(range = c(-5, 1), trans = scales::log10_trans()),
size = 15
)
# prophet_boost_finalize
set.seed(123)
workflow_fit_prophet_boost_kfold_rmse <- workflow_fit_prophet_boost %>%
update_model(model_spec_prophet_boost) %>%
finalize_workflow(
tune2_results_prophet_boost_kfold %>%
show_best(metric = "rmse") %>%
dplyr::slice(1)
) %>%
fit(training(splits))
write_rds(workflow_fit_prophet_boost_kfold_rmse, "data/workflow_fit_prophet_boost_kfold_rmse.rds")
set.seed(123)
workflow_fit_prophet_boost_kfold_rsq <- workflow_fit_prophet_boost %>%
update_model(model_spec_prophet_boost) %>%
finalize_workflow(
tune2_results_prophet_boost_kfold %>%
show_best(metric = "rsq") %>%
dplyr::slice(1)
) %>%
fit(training(splits))
write_rds(workflow_fit_prophet_boost_kfold_rsq, "data/workflow_fit_prophet_boost_kfold_rsq.rds")
TODO: Use workflows instead of models to fit_resamples all the models and get their CV accuracy.
# cross_validate
workflow_fit_nnetar_tscv <- read_rds("data/workflow_fit_nnetar_tscv.rds")
workflow_fit_prophet_boost_kfold_rmse <- read_rds("data/workflow_fit_prophet_boost_kfold_rmse.rds")
workflow_fit_prophet_boost_kfold_rsq <- read_rds("data/workflow_fit_prophet_boost_kfold_rsq.rds")
my_models <- as_workflow_set(
lm_lag_nospecial_fit = workflow_lm_lag_nospecial_fit,
arima_non_seasonal_differences = workflow_fit_arima_non_seasonal_differences,
auto_arima_baseline = workflow_fit_auto_arima_baseline,
prophet = workflow_fit_prophet,
ets = workflow_fit_ets,
stlm_arima = workflow_fit_stlm_arima,
mars_lag = workflow_fit_mars_lag,
svm_rbf_spline = workflow_fit_svm_rbf_spline,
knn_lag = workflow_fit_knn_lag,
rf_spline = workflow_fit_rf_spline,
xgboost_spline = workflow_fit_xgboost_spline,
cubist_lag = workflow_fit_cubist_lag,
nnet_spline = workflow_fit_nnet_spline,
nnetar_base = workflow_fit_nnetar_base,
arima_boost = workflow_fit_arima_boost,
nnetar_tscv = workflow_fit_nnetar_tscv,
prophet_boost_kfold_rmse = workflow_fit_prophet_boost_kfold_rmse,
prophet_boost_kfold_rsq = workflow_fit_prophet_boost_kfold_rsq
)
my_models_res <- my_models %>%
# The first argument is a function name from the {{tune}} package
# such as `tune_grid()`, `fit_resamples()`, etc.
workflow_map("fit_resamples",
resamples = resamples_tscv_lag,
metrics = default_forecast_accuracy_metric_set(),
verbose = TRUE
)
write_rds(my_models_res, "data/my_models_res.rds")
my_models_res <- read_rds("data/my_models_res.rds")
my_models_res %>% collect_metrics() %>%
pivot_wider(wflow_id, names_from = .metric, values_from = mean) %>%
arrange(mae)
## # A tibble: 18 × 7
## wflow_id mae mape mase rmse rsq smape
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto_arima_baseline 0.289 277. 2.38 0.326 NaN 77.0
## 2 arima_non_seasonal_differences 0.299 277. 2.64 0.334 0.392 77.7
## 3 arima_boost 0.306 294. 2.66 0.343 NaN 77.8
## 4 rf_spline 0.324 183. 3.56 0.361 0.615 82.3
## 5 xgboost_spline 0.330 200. 3.23 0.363 0.553 83.2
## 6 lm_lag_nospecial_fit 0.342 77.8 3.53 0.380 0.661 82.5
## 7 nnetar_base 0.346 236. 2.92 0.367 0.764 104.
## 8 knn_lag 0.378 375. 3.04 0.410 0.681 100.
## 9 nnet_spline 0.416 331. 3.77 0.458 0.408 89.9
## 10 prophet_boost_kfold_rmse 0.424 375. 3.81 0.465 0.604 88.9
## 11 svm_rbf_spline 0.455 164. 4.35 0.489 0.486 157.
## 12 stlm_arima 0.481 463. 4.06 0.543 0.711 91.6
## 13 prophet 0.518 674. 3.86 0.585 0.687 81.5
## 14 cubist_lag 0.608 497. 6.48 0.654 0.642 111.
## 15 ets 0.638 454. 5.78 0.694 0.484 116.
## 16 prophet_boost_kfold_rsq 0.688 647. 6.14 0.729 0.574 99.0
## 17 nnetar_tscv 0.724 701. 7.57 0.775 0.390 104.
## 18 mars_lag 0.734 1168. 4.83 0.849 0.703 126.
workflow_fit_nnetar_tscv <- read_rds("data/workflow_fit_nnetar_tscv.rds")
workflow_fit_prophet_boost_kfold_rmse <- read_rds("data/workflow_fit_prophet_boost_kfold_rmse.rds")
workflow_fit_prophet_boost_kfold_rsq <- read_rds("data/workflow_fit_prophet_boost_kfold_rsq.rds")
models_tbl <- modeltime_table(
workflow_lm_lag_nospecial_fit,
workflow_fit_arima_non_seasonal_differences,
workflow_fit_auto_arima_baseline,
workflow_fit_prophet,
workflow_fit_ets,
workflow_fit_stlm_arima,
workflow_fit_mars_lag,
workflow_fit_svm_rbf_spline,
workflow_fit_knn_lag,
workflow_fit_rf_spline,
workflow_fit_xgboost_spline,
workflow_fit_cubist_lag,
workflow_fit_nnet_spline,
workflow_fit_nnetar_base,
workflow_fit_arima_boost,
workflow_fit_nnetar_tscv,
workflow_fit_prophet_boost_kfold_rmse,
workflow_fit_prophet_boost_kfold_rsq
) %>%
update_modeltime_description(1, "workflow_lm_lag_nospecial_fit") %>%
update_modeltime_description(2, "workflow_fit_arima_non_seasonal_differences") %>%
update_modeltime_description(3, "workflow_fit_auto_arima_baseline") %>%
update_modeltime_description(4, "workflow_fit_prophet") %>%
update_modeltime_description(5, "workflow_fit_ets") %>%
update_modeltime_description(6, "workflow_fit_stlm_arima") %>%
update_modeltime_description(7, "workflow_fit_mars_lag") %>%
update_modeltime_description(8, "workflow_fit_svm_rbf_spline") %>%
update_modeltime_description(9, "workflow_fit_knn_lag") %>%
update_modeltime_description(10, "workflow_fit_rf_spline") %>%
update_modeltime_description(11, "workflow_fit_xgboost_spline") %>%
update_modeltime_description(12, "workflow_fit_cubist_lag") %>%
update_modeltime_description(13, "workflow_fit_nnet_spline") %>%
update_modeltime_description(14, "workflow_fit_nnetar_base") %>%
update_modeltime_description(15, "workflow_fit_arima_boost") %>%
update_modeltime_description(16, "workflow_fit_nnetar_tscv") %>%
update_modeltime_description(17, "workflow_fit_prophet_boost_kfold_rmse") %>%
update_modeltime_description(18, "workflow_fit_prophet_boost_kfold_rsq")
calibration_tbl <- models_tbl %>%
modeltime_calibrate(
new_data = testing(splits)
)
refit_tbl <- calibration_tbl %>%
modeltime_refit(data = data_prepared_tbl)
## frequency = 11 observations per 1 year
## frequency = 11 observations per 1 year
## frequency = 11 observations per 1 year
## Warning: 50 samples were requested but there were 34 rows in the data. 29 will
## be used.
## frequency = 11 observations per 1 year
## Warning in forecast::nnetar(y = outcome, p = p, P = P, size = size, repeats =
## repeats, : Missing values in xreg, omitting rows
## frequency = 11 observations per 1 year
future_forecast_tbl <- refit_tbl %>%
modeltime_forecast(new_data = bind_rows(testing(splits), forecast_tbl),
actual_data = data_prepared_tbl) %>%
mutate(across(.value:.conf_hi, .fns = ~ standardize_inv_vec(
x = .,
mean = std_mean,
sd = std_sd
))) %>%
mutate(across(.value:.conf_hi, .fns = ~ log_interval_inv_vec(
x = .,
limit_lower = limit_lower,
limit_upper = limit_upper,
offset = offset
)))
## Error:
## Error:
## Error: Missing data in columns: special.
## Error: I can't forecast when there are missing values near the end of the series.
## Error: I can't forecast when there are missing values near the end of the series.
data %>%
pivot_longer(-date) %>%
transmute(.model_id = NA, .model_desc = "ORIGINAL", .key = name, .index = date, .value = value, .conf_lo = NA, .conf_hi = NA) %>%
bind_rows(future_forecast_tbl) %>%
plot_modeltime_forecast()
future_forecast_tbl <- future_forecast_tbl %>%
mutate(
.value = ifelse(
.index %>% between_time("2020-03-01", "2020-05-01") |
.index %>% between_time("2021-03-01", "2021-05-01"),
NA,
.value
)
)
restore_tbl <- future_forecast_tbl %>%
filter(is.na(.value)) %>%
left_join(data, by = c(".index" = "date")) %>%
mutate(.value = change_yoy) %>%
select(-change_yoy)
future_forecast_tbl <- future_forecast_tbl %>%
filter(!is.na(.value)) %>%
bind_rows(restore_tbl)
data %>%
pivot_longer(-date) %>%
transmute(.model_id = NA, .model_desc = "ORIGINAL", .key = name, .index = date, .value = value, .conf_lo = NA, .conf_hi = NA) %>%
bind_rows(future_forecast_tbl) %>%
plot_modeltime_forecast()